mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/base.py +0 -31
  3. mlrun/artifacts/document.py +6 -1
  4. mlrun/artifacts/llm_prompt.py +123 -25
  5. mlrun/artifacts/manager.py +0 -5
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/common/constants.py +10 -1
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/common/model_monitoring/helpers.py +86 -0
  10. mlrun/common/schemas/__init__.py +3 -0
  11. mlrun/common/schemas/auth.py +2 -0
  12. mlrun/common/schemas/function.py +10 -0
  13. mlrun/common/schemas/hub.py +30 -18
  14. mlrun/common/schemas/model_monitoring/__init__.py +3 -0
  15. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  16. mlrun/common/schemas/model_monitoring/functions.py +14 -5
  17. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
  18. mlrun/common/schemas/pipeline.py +1 -1
  19. mlrun/common/schemas/serving.py +3 -0
  20. mlrun/common/schemas/workflow.py +3 -1
  21. mlrun/common/secrets.py +22 -1
  22. mlrun/config.py +33 -11
  23. mlrun/datastore/__init__.py +11 -3
  24. mlrun/datastore/azure_blob.py +162 -47
  25. mlrun/datastore/datastore.py +9 -4
  26. mlrun/datastore/datastore_profile.py +61 -5
  27. mlrun/datastore/model_provider/huggingface_provider.py +363 -0
  28. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  29. mlrun/datastore/model_provider/model_provider.py +230 -65
  30. mlrun/datastore/model_provider/openai_provider.py +295 -42
  31. mlrun/datastore/s3.py +24 -2
  32. mlrun/datastore/storeytargets.py +2 -3
  33. mlrun/datastore/utils.py +15 -3
  34. mlrun/db/base.py +47 -19
  35. mlrun/db/httpdb.py +120 -56
  36. mlrun/db/nopdb.py +38 -10
  37. mlrun/execution.py +70 -19
  38. mlrun/hub/__init__.py +15 -0
  39. mlrun/hub/module.py +181 -0
  40. mlrun/k8s_utils.py +105 -16
  41. mlrun/launcher/base.py +13 -6
  42. mlrun/launcher/local.py +15 -0
  43. mlrun/model.py +24 -3
  44. mlrun/model_monitoring/__init__.py +1 -0
  45. mlrun/model_monitoring/api.py +66 -27
  46. mlrun/model_monitoring/applications/__init__.py +1 -1
  47. mlrun/model_monitoring/applications/base.py +509 -117
  48. mlrun/model_monitoring/applications/context.py +2 -4
  49. mlrun/model_monitoring/applications/results.py +4 -7
  50. mlrun/model_monitoring/controller.py +239 -101
  51. mlrun/model_monitoring/db/_schedules.py +116 -33
  52. mlrun/model_monitoring/db/_stats.py +4 -3
  53. mlrun/model_monitoring/db/tsdb/base.py +100 -9
  54. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
  55. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
  56. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  57. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  58. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
  59. mlrun/model_monitoring/helpers.py +54 -9
  60. mlrun/model_monitoring/stream_processing.py +45 -14
  61. mlrun/model_monitoring/writer.py +220 -1
  62. mlrun/platforms/__init__.py +3 -2
  63. mlrun/platforms/iguazio.py +7 -3
  64. mlrun/projects/operations.py +6 -1
  65. mlrun/projects/pipelines.py +46 -26
  66. mlrun/projects/project.py +166 -58
  67. mlrun/run.py +94 -17
  68. mlrun/runtimes/__init__.py +18 -0
  69. mlrun/runtimes/base.py +14 -6
  70. mlrun/runtimes/daskjob.py +7 -0
  71. mlrun/runtimes/local.py +5 -2
  72. mlrun/runtimes/mounts.py +20 -2
  73. mlrun/runtimes/mpijob/abstract.py +6 -0
  74. mlrun/runtimes/mpijob/v1.py +6 -0
  75. mlrun/runtimes/nuclio/__init__.py +1 -0
  76. mlrun/runtimes/nuclio/application/application.py +149 -17
  77. mlrun/runtimes/nuclio/function.py +76 -27
  78. mlrun/runtimes/nuclio/serving.py +97 -15
  79. mlrun/runtimes/pod.py +234 -21
  80. mlrun/runtimes/remotesparkjob.py +6 -0
  81. mlrun/runtimes/sparkjob/spark3job.py +6 -0
  82. mlrun/runtimes/utils.py +49 -11
  83. mlrun/secrets.py +54 -13
  84. mlrun/serving/__init__.py +2 -0
  85. mlrun/serving/remote.py +79 -6
  86. mlrun/serving/routers.py +23 -41
  87. mlrun/serving/server.py +320 -80
  88. mlrun/serving/states.py +725 -157
  89. mlrun/serving/steps.py +62 -0
  90. mlrun/serving/system_steps.py +200 -119
  91. mlrun/serving/v2_serving.py +9 -10
  92. mlrun/utils/helpers.py +288 -88
  93. mlrun/utils/logger.py +3 -1
  94. mlrun/utils/notifications/notification/base.py +18 -0
  95. mlrun/utils/notifications/notification/git.py +2 -4
  96. mlrun/utils/notifications/notification/slack.py +2 -4
  97. mlrun/utils/notifications/notification/webhook.py +2 -5
  98. mlrun/utils/notifications/notification_pusher.py +1 -1
  99. mlrun/utils/retryer.py +15 -2
  100. mlrun/utils/version/version.json +2 -2
  101. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
  102. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
  103. mlrun/api/schemas/__init__.py +0 -259
  104. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
  105. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
  106. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
  107. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
@@ -12,14 +12,38 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from collections.abc import Awaitable
15
- from typing import Callable, Optional, TypeVar, Union
15
+ from typing import Any, Callable, Optional, Union
16
16
 
17
17
  import mlrun.errors
18
+ from mlrun.common.types import StrEnum
18
19
  from mlrun.datastore.remote_client import (
19
20
  BaseRemoteClient,
20
21
  )
21
22
 
22
- T = TypeVar("T")
23
+
24
+ class InvokeResponseFormat(StrEnum):
25
+ STRING = "string"
26
+ USAGE = "usage"
27
+ FULL = "full"
28
+
29
+ @classmethod
30
+ def is_str_response(cls, invoke_response_format: str) -> bool:
31
+ """
32
+ Returns True if the response key corresponds to a string-based response (not a full generation object).
33
+ """
34
+ return invoke_response_format in {
35
+ cls.USAGE,
36
+ cls.STRING,
37
+ }
38
+
39
+
40
+ class UsageResponseKeys(StrEnum):
41
+ ANSWER = "answer"
42
+ USAGE = "usage"
43
+
44
+ @classmethod
45
+ def fields(cls) -> list[str]:
46
+ return [cls.ANSWER, cls.USAGE]
23
47
 
24
48
 
25
49
  class ModelProvider(BaseRemoteClient):
@@ -56,105 +80,246 @@ class ModelProvider(BaseRemoteClient):
56
80
  )
57
81
  self.default_invoke_kwargs = default_invoke_kwargs or {}
58
82
  self._client = None
59
- self._default_operation = None
60
83
  self._async_client = None
61
- self._default_async_operation = None
62
84
 
63
- def load_client(self) -> None:
85
+ @staticmethod
86
+ def _extract_string_output(response: Any) -> str:
64
87
  """
65
- Initializes the SDK client for the model provider with the given keyword arguments
66
- and assigns it to an instance attribute (e.g., self._client).
67
-
68
- Subclasses should override this method to:
69
- - Create and configure the provider-specific client instance.
70
- - Assign the client instance to self._client.
71
- - Define a default operation callable (e.g., a method to invoke model completions)
72
- and assign it to self._default_operation.
88
+ Extracts string response from response object
73
89
  """
90
+ pass
74
91
 
75
- raise NotImplementedError("load_client method is not implemented")
76
-
77
- def invoke(
92
+ def _response_handler(
78
93
  self,
79
- messages: Optional[list[dict]] = None,
80
- as_str: bool = False,
81
- **invoke_kwargs,
82
- ) -> Optional[Union[str, T]]:
94
+ response: Any,
95
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
96
+ **kwargs,
97
+ ) -> Union[str, dict, Any]:
83
98
  """
84
- Invokes a generative AI model with the provided messages and additional parameters.
85
- This method is designed to be a flexible interface for interacting with various
86
- generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
87
- a list of messages (following a standardized format) and receive a response. The
88
- response can be returned as plain text or in its full structured format, depending
89
- on the `as_str` parameter.
99
+ Handles the model response according to the specified response format.
90
100
 
91
- :param messages: A list of dictionaries representing the conversation history or input messages.
92
- Each dictionary should follow the format::
93
- {"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
94
- Example:
101
+ :param response: The raw response returned from the model invocation.
102
+ :param invoke_response_format: Determines how the response should be processed and returned.
103
+ Options include:
95
104
 
96
- .. code-block:: json
105
+ - STRING: Return only the main generated content as a string,
106
+ typically for single-answer responses.
107
+ - USAGE: Return a dictionary combining the string response with
108
+ additional metadata or token usage statistics, in this format:
109
+ {"answer": <string>, "usage": <dict>}
97
110
 
98
- [
99
- {"role": "system", "content": "You are a helpful assistant."},
100
- {"role": "user", "content": "What is the capital of France?"}
101
- ]
111
+ - FULL: Return the full raw response object.
102
112
 
103
- This format is consistent across all backends. Defaults to None if no messages
104
- are provided.
113
+ :param kwargs: Additional parameters that may be required by specific implementations.
105
114
 
106
- :param as_str: A boolean flag indicating whether to return the response as a plain string.
107
- - If True, the function extracts and returns the main content of the first
108
- response.
109
- - If False, the function returns the full response object,
110
- which may include additional metadata or multiple response options.
111
- Defaults to False.
115
+ :return: The processed response in the format specified by `invoke_response_format`.
116
+ Can be a string, dictionary, or the original response object.
117
+ """
118
+ return None
112
119
 
113
- :param invoke_kwargs:
114
- Additional keyword arguments to be passed to the underlying model API call.
115
- These can include parameters such as temperature, max tokens, etc.,
116
- depending on the capabilities of the specific backend being used.
120
+ def get_client_options(self) -> dict:
121
+ """
122
+ Returns a dictionary containing credentials and configuration
123
+ options required for client creation.
124
+
125
+ :return: A dictionary with client-specific settings.
126
+ """
127
+ return {}
117
128
 
118
- :return:
119
- - If `as_str` is True: Returns the main content of the first response as a string.
120
- - If `as_str` is False: Returns the full response object.
129
+ def load_client(self) -> None:
130
+ """
131
+ Initialize the SDK client for the model provider and assign it to an instance attribute.
121
132
 
133
+ Subclasses should override this method to create and configure the provider-specific client.
122
134
  """
123
- raise NotImplementedError("invoke method is not implemented")
124
135
 
125
- def customized_invoke(
126
- self, operation: Optional[Callable[..., T]] = None, **invoke_kwargs
127
- ) -> Optional[T]:
128
- raise NotImplementedError("customized_invoke method is not implemented")
136
+ raise NotImplementedError("load_client method is not implemented")
137
+
138
+ def load_async_client(self) -> Any:
139
+ raise NotImplementedError("load_async_client method is not implemented")
129
140
 
130
141
  @property
131
- def client(self):
142
+ def client(self) -> Any:
132
143
  return self._client
133
144
 
134
145
  @property
135
- def model(self):
136
- return None
146
+ def model(self) -> Optional[str]:
147
+ """
148
+ Returns the model identifier used by the underlying SDK.
149
+
150
+ :return: A string representing the model ID, or None if not set.
151
+ """
152
+ return self.endpoint
137
153
 
138
- def get_invoke_kwargs(self, invoke_kwargs):
154
+ def get_invoke_kwargs(self, invoke_kwargs) -> dict:
139
155
  kwargs = self.default_invoke_kwargs.copy()
140
156
  kwargs.update(invoke_kwargs)
141
157
  return kwargs
142
158
 
143
159
  @property
144
- def async_client(self):
160
+ def async_client(self) -> Any:
145
161
  if not self.support_async:
146
162
  raise mlrun.errors.MLRunInvalidArgumentError(
147
163
  f"{self.__class__.__name__} does not support async operations"
148
164
  )
149
165
  return self._async_client
150
166
 
151
- async def async_customized_invoke(self, **kwargs):
152
- raise NotImplementedError("async_customized_invoke is not implemented")
167
+ def custom_invoke(
168
+ self, operation: Optional[Callable] = None, **invoke_kwargs
169
+ ) -> Any:
170
+ """
171
+ Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
172
+
173
+ Useful for dynamically calling model methods like text generation, chat completions, or image generation.
174
+ The operation must be a callable that accepts keyword arguments.
175
+
176
+ :param operation: A callable representing the model operation (e.g., a client method).
177
+ :param invoke_kwargs: Keyword arguments to pass to the operation.
178
+ :return: The full response returned by the operation.
179
+ """
180
+ raise NotImplementedError("custom_invoke method is not implemented")
181
+
182
+ async def async_custom_invoke(
183
+ self, operation: Optional[Callable[..., Awaitable[Any]]] = None, **invoke_kwargs
184
+ ) -> Any:
185
+ """
186
+ Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
187
+ with the given keyword arguments.
188
+
189
+ The operation must be an async callable (e.g., a method from an async client) that accepts keyword arguments.
190
+
191
+ :param operation: An async callable representing the model operation (e.g., an async_client method).
192
+ :param invoke_kwargs: Keyword arguments to pass to the operation.
193
+ :return: The full response returned by the awaited operation.
194
+ """
195
+ raise NotImplementedError("async_custom_invoke is not implemented")
196
+
197
+ def invoke(
198
+ self,
199
+ messages: Union[list[dict], Any],
200
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
201
+ **invoke_kwargs,
202
+ ) -> Union[str, dict[str, Any], Any]:
203
+ """
204
+ Invokes a generative AI model with the provided messages and additional parameters.
205
+ This method is designed to be a flexible interface for interacting with various
206
+ generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
207
+ a list of messages (following a standardized format) and receive a response.
208
+
209
+ :param messages: A list of dictionaries representing the conversation history or input messages.
210
+ Each dictionary should follow the format::
211
+ {"role": "system"| "user" | "assistant" ..., "content":
212
+ "Message content as a string"}
213
+
214
+ Example:
215
+
216
+ .. code-block:: json
217
+
218
+ [
219
+ {"role": "system", "content": "You are a helpful assistant."},
220
+ {"role": "user", "content": "What is the capital of France?"}
221
+ ]
222
+
223
+ This format is consistent across all backends. Defaults to None if no messages
224
+ are provided.
225
+
226
+ :param invoke_response_format: Determines how the model response is returned:
227
+
228
+ - string: Returns only the generated text content from the model output,
229
+ for single-answer responses only.
230
+
231
+ - usage: Combines the STRING response with additional metadata (token usage),
232
+ and returns the result in a dictionary.
233
+
234
+ Note: The usage dictionary may contain additional
235
+ keys depending on the model provider:
236
+
237
+ .. code-block:: json
238
+
239
+ {
240
+ "answer": "<generated_text>",
241
+ "usage": {
242
+ "prompt_tokens": <int>,
243
+ "completion_tokens": <int>,
244
+ "total_tokens": <int>
245
+ }
246
+
247
+ }
248
+
249
+ - full: Returns the full model output.
250
+
251
+ :param invoke_kwargs:
252
+ Additional keyword arguments to be passed to the underlying model API call.
253
+ These can include parameters such as temperature, max tokens, etc.,
254
+ depending on the capabilities of the specific backend being used.
255
+
256
+ :return: The invoke result formatted according to the specified
257
+ invoke_response_format parameter.
258
+
259
+ """
260
+ raise NotImplementedError("invoke method is not implemented")
153
261
 
154
262
  async def async_invoke(
155
263
  self,
156
- messages: Optional[list[dict]] = None,
157
- as_str: bool = False,
264
+ messages: list[dict],
265
+ invoke_response_format=InvokeResponseFormat.FULL,
158
266
  **invoke_kwargs,
159
- ) -> Awaitable[str]:
267
+ ) -> Union[str, dict[str, Any], Any]:
268
+ """
269
+ Asynchronously invokes a generative AI model with the provided messages and additional parameters.
270
+ This method is designed to be a flexible interface for interacting with various
271
+ generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
272
+ a list of messages (following a standardized format) and receive a response.
273
+
274
+ :param messages: A list of dictionaries representing the conversation history or input messages.
275
+ Each dictionary should follow the format::
276
+ {"role": "system"| "user" | "assistant" ..., "content":
277
+ "Message content as a string"}
278
+
279
+ Example:
280
+
281
+ .. code-block:: json
282
+
283
+ [
284
+ {"role": "system", "content": "You are a helpful assistant."},
285
+ {"role": "user", "content": "What is the capital of France?"}
286
+ ]
287
+
288
+ This format is consistent across all backends. Defaults to None if no messages
289
+ are provided.
290
+
291
+ :param invoke_response_format: Determines how the model response is returned:
292
+
293
+ - string: Returns only the generated text content from the model output,
294
+ for single-answer responses only.
295
+
296
+ - usage: Combines the STRING response with additional metadata (token usage),
297
+ and returns the result in a dictionary.
298
+
299
+ Note: The usage dictionary may contain additional
300
+ keys depending on the model provider:
301
+
302
+ .. code-block:: json
303
+
304
+ {
305
+ "answer": "<generated_text>",
306
+ "usage": {
307
+ "prompt_tokens": <int>,
308
+ "completion_tokens": <int>,
309
+ "total_tokens": <int>
310
+ }
311
+
312
+ }
313
+
314
+ - full: Returns the full model output.
315
+
316
+ :param invoke_kwargs:
317
+ Additional keyword arguments to be passed to the underlying model API call.
318
+ These can include parameters such as temperature, max tokens, etc.,
319
+ depending on the capabilities of the specific backend being used.
320
+
321
+ :return: The invoke result formatted according to the specified
322
+ invoke_response_format parameter.
323
+
324
+ """
160
325
  raise NotImplementedError("async_invoke is not implemented")