mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +21 -15
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +9 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +13 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +34 -21
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/base.py +265 -7
- mlrun/datastore/datastore.py +10 -5
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +367 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +211 -74
- mlrun/datastore/model_provider/openai_provider.py +243 -71
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +27 -19
- mlrun/db/httpdb.py +57 -48
- mlrun/db/nopdb.py +25 -10
- mlrun/execution.py +55 -13
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +2 -0
- mlrun/model.py +9 -3
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +388 -138
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +36 -13
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +29 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
- mlrun/model_monitoring/helpers.py +28 -5
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +16 -11
- mlrun/projects/pipelines.py +2 -2
- mlrun/projects/project.py +157 -69
- mlrun/run.py +97 -20
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +147 -17
- mlrun/runtimes/nuclio/function.py +72 -27
- mlrun/runtimes/nuclio/serving.py +102 -20
- mlrun/runtimes/pod.py +213 -21
- mlrun/runtimes/utils.py +49 -9
- mlrun/secrets.py +54 -13
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +230 -40
- mlrun/serving/states.py +605 -232
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +136 -81
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +215 -83
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/mail.py +38 -15
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0
|
@@ -12,14 +12,38 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from collections.abc import Awaitable
|
|
15
|
-
from typing import Any, Callable, Optional,
|
|
15
|
+
from typing import Any, Callable, Optional, Union
|
|
16
16
|
|
|
17
17
|
import mlrun.errors
|
|
18
|
+
from mlrun.common.types import StrEnum
|
|
18
19
|
from mlrun.datastore.remote_client import (
|
|
19
20
|
BaseRemoteClient,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
|
|
24
|
+
class InvokeResponseFormat(StrEnum):
|
|
25
|
+
STRING = "string"
|
|
26
|
+
USAGE = "usage"
|
|
27
|
+
FULL = "full"
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def is_str_response(cls, invoke_response_format: str) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Returns True if the response key corresponds to a string-based response (not a full generation object).
|
|
33
|
+
"""
|
|
34
|
+
return invoke_response_format in {
|
|
35
|
+
cls.USAGE,
|
|
36
|
+
cls.STRING,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UsageResponseKeys(StrEnum):
|
|
41
|
+
ANSWER = "answer"
|
|
42
|
+
USAGE = "usage"
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def fields(cls) -> list[str]:
|
|
46
|
+
return [cls.ANSWER, cls.USAGE]
|
|
23
47
|
|
|
24
48
|
|
|
25
49
|
class ModelProvider(BaseRemoteClient):
|
|
@@ -58,89 +82,61 @@ class ModelProvider(BaseRemoteClient):
|
|
|
58
82
|
self._client = None
|
|
59
83
|
self._async_client = None
|
|
60
84
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
Returns a dictionary containing credentials and configuration
|
|
64
|
-
options required for client creation.
|
|
65
|
-
|
|
66
|
-
:return: A dictionary with client-specific settings.
|
|
67
|
-
"""
|
|
68
|
-
return {}
|
|
69
|
-
|
|
70
|
-
def load_client(self) -> None:
|
|
85
|
+
@staticmethod
|
|
86
|
+
def _extract_string_output(response: Any) -> str:
|
|
71
87
|
"""
|
|
72
|
-
|
|
73
|
-
and assigns it to an instance attribute (e.g., self._client).
|
|
74
|
-
|
|
75
|
-
Subclasses should override this method to:
|
|
76
|
-
- Create and configure the provider-specific client instance.
|
|
77
|
-
- Assign the client instance to self._client.
|
|
88
|
+
Extracts string response from response object
|
|
78
89
|
"""
|
|
90
|
+
pass
|
|
79
91
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def invoke(
|
|
92
|
+
def _response_handler(
|
|
83
93
|
self,
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
**
|
|
87
|
-
) ->
|
|
94
|
+
response: Any,
|
|
95
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
96
|
+
**kwargs,
|
|
97
|
+
) -> Union[str, dict, Any]:
|
|
88
98
|
"""
|
|
89
|
-
|
|
90
|
-
This method is designed to be a flexible interface for interacting with various
|
|
91
|
-
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
92
|
-
a list of messages (following a standardized format) and receive a response. The
|
|
93
|
-
response can be returned as plain text or in its full structured format, depending
|
|
94
|
-
on the `as_str` parameter.
|
|
99
|
+
Handles the model response according to the specified response format.
|
|
95
100
|
|
|
96
|
-
:param
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
Example:
|
|
101
|
+
:param response: The raw response returned from the model invocation.
|
|
102
|
+
:param invoke_response_format: Determines how the response should be processed and returned.
|
|
103
|
+
Options include:
|
|
100
104
|
|
|
101
|
-
|
|
105
|
+
- STRING: Return only the main generated content as a string,
|
|
106
|
+
typically for single-answer responses.
|
|
107
|
+
- USAGE: Return a dictionary combining the string response with
|
|
108
|
+
additional metadata or token usage statistics, in this format:
|
|
109
|
+
{"answer": <string>, "usage": <dict>}
|
|
102
110
|
|
|
103
|
-
|
|
104
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
|
105
|
-
{"role": "user", "content": "What is the capital of France?"}
|
|
106
|
-
]
|
|
111
|
+
- FULL: Return the full raw response object.
|
|
107
112
|
|
|
108
|
-
|
|
109
|
-
are provided.
|
|
113
|
+
:param kwargs: Additional parameters that may be required by specific implementations.
|
|
110
114
|
|
|
111
|
-
:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
which may include additional metadata or multiple response options.
|
|
116
|
-
Defaults to False.
|
|
115
|
+
:return: The processed response in the format specified by `invoke_response_format`.
|
|
116
|
+
Can be a string, dictionary, or the original response object.
|
|
117
|
+
"""
|
|
118
|
+
return None
|
|
117
119
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
120
|
+
def get_client_options(self) -> dict:
|
|
121
|
+
"""
|
|
122
|
+
Returns a dictionary containing credentials and configuration
|
|
123
|
+
options required for client creation.
|
|
122
124
|
|
|
123
|
-
:return:
|
|
124
|
-
|
|
125
|
-
|
|
125
|
+
:return: A dictionary with client-specific settings.
|
|
126
|
+
"""
|
|
127
|
+
return {}
|
|
126
128
|
|
|
129
|
+
def load_client(self) -> None:
|
|
127
130
|
"""
|
|
128
|
-
|
|
131
|
+
Initialize the SDK client for the model provider and assign it to an instance attribute.
|
|
129
132
|
|
|
130
|
-
|
|
131
|
-
self, operation: Optional[Callable[..., T]] = None, **invoke_kwargs
|
|
132
|
-
) -> Optional[T]:
|
|
133
|
+
Subclasses should override this method to create and configure the provider-specific client.
|
|
133
134
|
"""
|
|
134
|
-
Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
|
|
135
135
|
|
|
136
|
-
|
|
137
|
-
The operation must be a callable that accepts keyword arguments.
|
|
136
|
+
raise NotImplementedError("load_client method is not implemented")
|
|
138
137
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
:return: The full response returned by the operation.
|
|
142
|
-
"""
|
|
143
|
-
raise NotImplementedError("custom_invoke method is not implemented")
|
|
138
|
+
def load_async_client(self) -> Any:
|
|
139
|
+
raise NotImplementedError("load_async_client method is not implemented")
|
|
144
140
|
|
|
145
141
|
@property
|
|
146
142
|
def client(self) -> Any:
|
|
@@ -148,7 +144,12 @@ class ModelProvider(BaseRemoteClient):
|
|
|
148
144
|
|
|
149
145
|
@property
|
|
150
146
|
def model(self) -> Optional[str]:
|
|
151
|
-
|
|
147
|
+
"""
|
|
148
|
+
Returns the model identifier used by the underlying SDK.
|
|
149
|
+
|
|
150
|
+
:return: A string representing the model ID, or None if not set.
|
|
151
|
+
"""
|
|
152
|
+
return self.endpoint
|
|
152
153
|
|
|
153
154
|
def get_invoke_kwargs(self, invoke_kwargs) -> dict:
|
|
154
155
|
kwargs = self.default_invoke_kwargs.copy()
|
|
@@ -163,9 +164,24 @@ class ModelProvider(BaseRemoteClient):
|
|
|
163
164
|
)
|
|
164
165
|
return self._async_client
|
|
165
166
|
|
|
167
|
+
def custom_invoke(
|
|
168
|
+
self, operation: Optional[Callable] = None, **invoke_kwargs
|
|
169
|
+
) -> Any:
|
|
170
|
+
"""
|
|
171
|
+
Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
|
|
172
|
+
|
|
173
|
+
Useful for dynamically calling model methods like text generation, chat completions, or image generation.
|
|
174
|
+
The operation must be a callable that accepts keyword arguments.
|
|
175
|
+
|
|
176
|
+
:param operation: A callable representing the model operation (e.g., a client method).
|
|
177
|
+
:param invoke_kwargs: Keyword arguments to pass to the operation.
|
|
178
|
+
:return: The full response returned by the operation.
|
|
179
|
+
"""
|
|
180
|
+
raise NotImplementedError("custom_invoke method is not implemented")
|
|
181
|
+
|
|
166
182
|
async def async_custom_invoke(
|
|
167
|
-
self, operation: Optional[Callable[..., Awaitable[
|
|
168
|
-
) ->
|
|
183
|
+
self, operation: Optional[Callable[..., Awaitable[Any]]] = None, **invoke_kwargs
|
|
184
|
+
) -> Any:
|
|
169
185
|
"""
|
|
170
186
|
Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
|
|
171
187
|
with the given keyword arguments.
|
|
@@ -178,11 +194,132 @@ class ModelProvider(BaseRemoteClient):
|
|
|
178
194
|
"""
|
|
179
195
|
raise NotImplementedError("async_custom_invoke is not implemented")
|
|
180
196
|
|
|
197
|
+
def invoke(
|
|
198
|
+
self,
|
|
199
|
+
messages: Union[list[dict], Any],
|
|
200
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
201
|
+
**invoke_kwargs,
|
|
202
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
203
|
+
"""
|
|
204
|
+
Invokes a generative AI model with the provided messages and additional parameters.
|
|
205
|
+
This method is designed to be a flexible interface for interacting with various
|
|
206
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
207
|
+
a list of messages (following a standardized format) and receive a response.
|
|
208
|
+
|
|
209
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
210
|
+
Each dictionary should follow the format::
|
|
211
|
+
{"role": "system"| "user" | "assistant" ..., "content":
|
|
212
|
+
"Message content as a string"}
|
|
213
|
+
|
|
214
|
+
Example:
|
|
215
|
+
|
|
216
|
+
.. code-block:: json
|
|
217
|
+
|
|
218
|
+
[
|
|
219
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
220
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
224
|
+
are provided.
|
|
225
|
+
|
|
226
|
+
:param invoke_response_format: Determines how the model response is returned:
|
|
227
|
+
|
|
228
|
+
- string: Returns only the generated text content from the model output,
|
|
229
|
+
for single-answer responses only.
|
|
230
|
+
|
|
231
|
+
- usage: Combines the STRING response with additional metadata (token usage),
|
|
232
|
+
and returns the result in a dictionary.
|
|
233
|
+
|
|
234
|
+
Note: The usage dictionary may contain additional
|
|
235
|
+
keys depending on the model provider:
|
|
236
|
+
|
|
237
|
+
.. code-block:: json
|
|
238
|
+
|
|
239
|
+
{
|
|
240
|
+
"answer": "<generated_text>",
|
|
241
|
+
"usage": {
|
|
242
|
+
"prompt_tokens": <int>,
|
|
243
|
+
"completion_tokens": <int>,
|
|
244
|
+
"total_tokens": <int>
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
- full: Returns the full model output.
|
|
250
|
+
|
|
251
|
+
:param invoke_kwargs:
|
|
252
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
253
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
254
|
+
depending on the capabilities of the specific backend being used.
|
|
255
|
+
|
|
256
|
+
:return: The invoke result formatted according to the specified
|
|
257
|
+
invoke_response_format parameter.
|
|
258
|
+
|
|
259
|
+
"""
|
|
260
|
+
raise NotImplementedError("invoke method is not implemented")
|
|
261
|
+
|
|
181
262
|
async def async_invoke(
|
|
182
263
|
self,
|
|
183
|
-
messages:
|
|
184
|
-
|
|
264
|
+
messages: list[dict],
|
|
265
|
+
invoke_response_format=InvokeResponseFormat.FULL,
|
|
185
266
|
**invoke_kwargs,
|
|
186
|
-
) ->
|
|
187
|
-
"""
|
|
267
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
268
|
+
"""
|
|
269
|
+
Asynchronously invokes a generative AI model with the provided messages and additional parameters.
|
|
270
|
+
This method is designed to be a flexible interface for interacting with various
|
|
271
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
272
|
+
a list of messages (following a standardized format) and receive a response.
|
|
273
|
+
|
|
274
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
275
|
+
Each dictionary should follow the format::
|
|
276
|
+
{"role": "system"| "user" | "assistant" ..., "content":
|
|
277
|
+
"Message content as a string"}
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
|
|
281
|
+
.. code-block:: json
|
|
282
|
+
|
|
283
|
+
[
|
|
284
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
285
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
289
|
+
are provided.
|
|
290
|
+
|
|
291
|
+
:param invoke_response_format: Determines how the model response is returned:
|
|
292
|
+
|
|
293
|
+
- string: Returns only the generated text content from the model output,
|
|
294
|
+
for single-answer responses only.
|
|
295
|
+
|
|
296
|
+
- usage: Combines the STRING response with additional metadata (token usage),
|
|
297
|
+
and returns the result in a dictionary.
|
|
298
|
+
|
|
299
|
+
Note: The usage dictionary may contain additional
|
|
300
|
+
keys depending on the model provider:
|
|
301
|
+
|
|
302
|
+
.. code-block:: json
|
|
303
|
+
|
|
304
|
+
{
|
|
305
|
+
"answer": "<generated_text>",
|
|
306
|
+
"usage": {
|
|
307
|
+
"prompt_tokens": <int>,
|
|
308
|
+
"completion_tokens": <int>,
|
|
309
|
+
"total_tokens": <int>
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
- full: Returns the full model output.
|
|
315
|
+
|
|
316
|
+
:param invoke_kwargs:
|
|
317
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
318
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
319
|
+
depending on the capabilities of the specific backend being used.
|
|
320
|
+
|
|
321
|
+
:return: The invoke result formatted according to the specified
|
|
322
|
+
invoke_response_format parameter.
|
|
323
|
+
|
|
324
|
+
"""
|
|
188
325
|
raise NotImplementedError("async_invoke is not implemented")
|