mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +8 -1
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/hub.py +25 -18
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
- mlrun/config.py +2 -3
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/azure_blob.py +66 -43
- mlrun/datastore/datastore_profile.py +35 -5
- mlrun/datastore/model_provider/huggingface_provider.py +122 -30
- mlrun/datastore/model_provider/model_provider.py +62 -4
- mlrun/datastore/model_provider/openai_provider.py +114 -43
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/db/base.py +15 -1
- mlrun/db/httpdb.py +17 -6
- mlrun/db/nopdb.py +14 -0
- mlrun/k8s_utils.py +0 -14
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +37 -10
- mlrun/model_monitoring/applications/context.py +1 -4
- mlrun/model_monitoring/controller.py +15 -5
- mlrun/model_monitoring/db/_schedules.py +2 -4
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
- mlrun/model_monitoring/helpers.py +5 -5
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/project.py +33 -29
- mlrun/runtimes/base.py +0 -3
- mlrun/runtimes/mounts.py +15 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +11 -2
- mlrun/runtimes/nuclio/function.py +10 -0
- mlrun/runtimes/nuclio/serving.py +4 -0
- mlrun/runtimes/pod.py +153 -11
- mlrun/runtimes/utils.py +22 -5
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +26 -14
- mlrun/serving/states.py +3 -3
- mlrun/serving/system_steps.py +52 -29
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
|
@@ -36,6 +36,9 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
36
36
|
This class extends the ModelProvider base class and implements Hugging Face-specific
|
|
37
37
|
functionality, including pipeline initialization, default text generation operations,
|
|
38
38
|
and custom operations tailored to the Hugging Face Transformers pipeline API.
|
|
39
|
+
|
|
40
|
+
Note: The pipeline object will download the model (if not already cached) and load it
|
|
41
|
+
into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
|
|
39
42
|
"""
|
|
40
43
|
|
|
41
44
|
def __init__(
|
|
@@ -62,13 +65,12 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
62
65
|
)
|
|
63
66
|
self.options = self.get_client_options()
|
|
64
67
|
self._expected_operation_type = None
|
|
65
|
-
self.
|
|
68
|
+
self._download_model()
|
|
66
69
|
|
|
67
70
|
@staticmethod
|
|
68
71
|
def _extract_string_output(response: list[dict]) -> str:
|
|
69
72
|
"""
|
|
70
|
-
Extracts the first generated string from Hugging Face pipeline output
|
|
71
|
-
regardless of whether it's plain text-generation or chat-style output.
|
|
73
|
+
Extracts the first generated string from Hugging Face pipeline output
|
|
72
74
|
"""
|
|
73
75
|
if not isinstance(response, list) or len(response) == 0:
|
|
74
76
|
raise ValueError("Empty or invalid pipeline output")
|
|
@@ -86,6 +88,39 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
86
88
|
subpath = ""
|
|
87
89
|
return endpoint, subpath
|
|
88
90
|
|
|
91
|
+
@property
|
|
92
|
+
def client(self) -> Any:
|
|
93
|
+
"""
|
|
94
|
+
Lazily return the HuggingFace-pipeline client.
|
|
95
|
+
|
|
96
|
+
If the client has not been initialized yet, it will be created
|
|
97
|
+
by calling `load_client`.
|
|
98
|
+
"""
|
|
99
|
+
self.load_client()
|
|
100
|
+
return self._client
|
|
101
|
+
|
|
102
|
+
def _download_model(self):
|
|
103
|
+
"""
|
|
104
|
+
Pre-downloads model files locally to prevent race conditions in multiprocessing.
|
|
105
|
+
|
|
106
|
+
Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
|
|
107
|
+
file copying for safe concurrent access across multiple processes.
|
|
108
|
+
|
|
109
|
+
:raises:
|
|
110
|
+
ImportError: If huggingface_hub package is not installed.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
from huggingface_hub import snapshot_download
|
|
114
|
+
|
|
115
|
+
# Download the model and tokenizer files directly to the cache.
|
|
116
|
+
snapshot_download(
|
|
117
|
+
repo_id=self.model,
|
|
118
|
+
local_dir_use_symlinks=False,
|
|
119
|
+
token=self._get_secret_or_env("HF_TOKEN") or None,
|
|
120
|
+
)
|
|
121
|
+
except ImportError as exc:
|
|
122
|
+
raise ImportError("huggingface_hub package is not installed") from exc
|
|
123
|
+
|
|
89
124
|
def _response_handler(
|
|
90
125
|
self,
|
|
91
126
|
response: Union[str, list],
|
|
@@ -94,27 +129,46 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
94
129
|
**kwargs,
|
|
95
130
|
) -> Union[str, list, dict[str, Any]]:
|
|
96
131
|
"""
|
|
97
|
-
|
|
132
|
+
Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
|
|
133
|
+
|
|
134
|
+
The response should exclude the user’s input (no repetition in the output).
|
|
135
|
+
This can be accomplished by invoking the pipeline with `return_full_text=False`.
|
|
136
|
+
|
|
137
|
+
:param response: The raw response from the HuggingFace pipeline, typically a list of dictionaries
|
|
138
|
+
containing generated text sequences.
|
|
139
|
+
:param invoke_response_format: Determines how the response should be processed and returned. Options:
|
|
140
|
+
|
|
141
|
+
- STRING: Return only the main generated content as a string,
|
|
142
|
+
for single-answer responses.
|
|
143
|
+
- USAGE: Return a dictionary combining the string response with
|
|
144
|
+
token usage statistics:
|
|
98
145
|
|
|
99
|
-
|
|
146
|
+
.. code-block:: json
|
|
100
147
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
148
|
+
{
|
|
149
|
+
"answer": "<generated_text>",
|
|
150
|
+
"usage": {
|
|
151
|
+
"prompt_tokens": <int>,
|
|
152
|
+
"completion_tokens": <int>,
|
|
153
|
+
"total_tokens": <int>
|
|
154
|
+
}
|
|
155
|
+
}
|
|
104
156
|
|
|
105
|
-
|
|
106
|
-
|
|
157
|
+
Note: Token counts are estimated after answer generation and
|
|
158
|
+
may differ from the actual tokens generated by the model due to
|
|
159
|
+
internal decoding behavior and implementation details.
|
|
107
160
|
|
|
108
|
-
|
|
109
|
-
may differ from the actual tokens generated by the model due to
|
|
110
|
-
internal decoding behavior and implementation details.
|
|
161
|
+
- FULL: Return the full raw response object.
|
|
111
162
|
|
|
112
|
-
:param
|
|
163
|
+
:param messages: The original input messages used for token count estimation in USAGE mode.
|
|
164
|
+
Can be a string, list of strings, or chat format messages.
|
|
165
|
+
:param kwargs: Additional parameters for response processing.
|
|
113
166
|
|
|
114
|
-
:return:
|
|
167
|
+
:return: The processed response in the format specified by `invoke_response_format`.
|
|
168
|
+
Can be a string, dictionary, or the original response object.
|
|
115
169
|
|
|
116
170
|
:raises MLRunInvalidArgumentError: If extracting the string response fails.
|
|
117
|
-
:raises MLRunRuntimeError: If applying the chat template to the model fails.
|
|
171
|
+
:raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
|
|
118
172
|
"""
|
|
119
173
|
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
120
174
|
str_response = self._extract_string_output(response)
|
|
@@ -161,11 +215,15 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
161
215
|
:raises:
|
|
162
216
|
ImportError: If the `transformers` package is not installed.
|
|
163
217
|
"""
|
|
218
|
+
if self._client:
|
|
219
|
+
return
|
|
164
220
|
try:
|
|
165
221
|
from transformers import pipeline, AutoModelForCausalLM # noqa
|
|
166
222
|
from transformers import AutoTokenizer # noqa
|
|
167
223
|
from transformers.pipelines.base import Pipeline # noqa
|
|
168
224
|
|
|
225
|
+
self.options["model_kwargs"] = self.options.get("model_kwargs", {})
|
|
226
|
+
self.options["model_kwargs"]["local_files_only"] = True
|
|
169
227
|
self._client = pipeline(model=self.model, **self.options)
|
|
170
228
|
self._expected_operation_type = Pipeline
|
|
171
229
|
except ImportError as exc:
|
|
@@ -186,23 +244,38 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
186
244
|
self, operation: Optional["Pipeline"] = None, **invoke_kwargs
|
|
187
245
|
) -> Union[list, dict, Any]:
|
|
188
246
|
"""
|
|
189
|
-
HuggingFace
|
|
190
|
-
|
|
247
|
+
Invokes a HuggingFace pipeline operation with the given keyword arguments.
|
|
248
|
+
|
|
249
|
+
This method provides flexibility to use a custom pipeline object for specific tasks
|
|
250
|
+
(e.g., image classification, sentiment analysis).
|
|
251
|
+
|
|
252
|
+
The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
|
|
191
253
|
|
|
192
254
|
Example:
|
|
193
|
-
|
|
255
|
+
```python
|
|
256
|
+
from transformers import pipeline
|
|
257
|
+
from PIL import Image
|
|
258
|
+
|
|
259
|
+
# Using custom pipeline for image classification
|
|
194
260
|
image = Image.open(image_path)
|
|
195
|
-
pipeline_object =
|
|
261
|
+
pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
|
|
196
262
|
result = hf_provider.custom_invoke(
|
|
197
263
|
pipeline_object,
|
|
198
264
|
inputs=image,
|
|
199
265
|
)
|
|
200
|
-
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
:param operation: A Pipeline object from the transformers library.
|
|
269
|
+
If not provided, defaults to the provider's configured pipeline.
|
|
270
|
+
:param invoke_kwargs: Keyword arguments to pass to the pipeline operation.
|
|
271
|
+
These are merged with `default_invoke_kwargs` and may include
|
|
272
|
+
parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
|
|
201
273
|
|
|
274
|
+
:return: The full response returned by the pipeline operation.
|
|
275
|
+
Format depends on the pipeline task (list for text generation,
|
|
276
|
+
dict for classification, etc.).
|
|
202
277
|
|
|
203
|
-
:
|
|
204
|
-
:param invoke_kwargs: Keyword arguments to pass to the operation.
|
|
205
|
-
:return: The full response returned by the operation.
|
|
278
|
+
:raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
|
|
206
279
|
|
|
207
280
|
"""
|
|
208
281
|
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
@@ -222,12 +295,24 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
222
295
|
**invoke_kwargs,
|
|
223
296
|
) -> Union[str, list, dict[str, Any]]:
|
|
224
297
|
"""
|
|
225
|
-
HuggingFace-specific implementation of
|
|
226
|
-
Invokes a HuggingFace model operation
|
|
227
|
-
|
|
298
|
+
HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
|
|
299
|
+
Invokes a HuggingFace model operation for text generation tasks.
|
|
300
|
+
|
|
301
|
+
Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
|
|
228
302
|
|
|
229
303
|
:param messages:
|
|
230
|
-
|
|
304
|
+
Input for the text generation model. Can be provided in multiple formats:
|
|
305
|
+
|
|
306
|
+
- A single string: Direct text input for generation
|
|
307
|
+
- A list of strings: Multiple text inputs for batch processing
|
|
308
|
+
- Chat format: A list of dictionaries with "role" and "content" keys:
|
|
309
|
+
|
|
310
|
+
.. code-block:: json
|
|
311
|
+
|
|
312
|
+
[
|
|
313
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
314
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
315
|
+
]
|
|
231
316
|
|
|
232
317
|
:param invoke_response_format: InvokeResponseFormat
|
|
233
318
|
Specifies the format of the returned response. Options:
|
|
@@ -245,17 +330,24 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
245
330
|
}
|
|
246
331
|
}
|
|
247
332
|
|
|
333
|
+
Note: For usage mode, the model tokenizer should support apply_chat_template.
|
|
334
|
+
|
|
248
335
|
- "full": Returns the raw response object from the HuggingFace model,
|
|
249
336
|
typically a list of generated sequences (dictionaries).
|
|
250
337
|
This format does not include token usage statistics.
|
|
251
338
|
|
|
252
339
|
:param invoke_kwargs:
|
|
253
|
-
Additional keyword arguments passed to the HuggingFace
|
|
340
|
+
Additional keyword arguments passed to the HuggingFace pipeline.
|
|
254
341
|
|
|
255
342
|
:return:
|
|
256
343
|
A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
|
|
257
|
-
"""
|
|
258
344
|
|
|
345
|
+
:raises MLRunInvalidArgumentError:
|
|
346
|
+
If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
|
|
347
|
+
string content.
|
|
348
|
+
:raises MLRunRuntimeError:
|
|
349
|
+
If using "usage" response mode and the model tokenizer does not support chat template formatting.
|
|
350
|
+
"""
|
|
259
351
|
if self.client.task != "text-generation":
|
|
260
352
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
261
353
|
"HuggingFaceProvider.invoke supports text-generation task only"
|
|
@@ -108,7 +108,7 @@ class ModelProvider(BaseRemoteClient):
|
|
|
108
108
|
additional metadata or token usage statistics, in this format:
|
|
109
109
|
{"answer": <string>, "usage": <dict>}
|
|
110
110
|
|
|
111
|
-
- FULL: Return the full raw response object
|
|
111
|
+
- FULL: Return the full raw response object.
|
|
112
112
|
|
|
113
113
|
:param kwargs: Additional parameters that may be required by specific implementations.
|
|
114
114
|
|
|
@@ -164,7 +164,9 @@ class ModelProvider(BaseRemoteClient):
|
|
|
164
164
|
)
|
|
165
165
|
return self._async_client
|
|
166
166
|
|
|
167
|
-
def custom_invoke(
|
|
167
|
+
def custom_invoke(
|
|
168
|
+
self, operation: Optional[Callable] = None, **invoke_kwargs
|
|
169
|
+
) -> Any:
|
|
168
170
|
"""
|
|
169
171
|
Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
|
|
170
172
|
|
|
@@ -178,7 +180,7 @@ class ModelProvider(BaseRemoteClient):
|
|
|
178
180
|
raise NotImplementedError("custom_invoke method is not implemented")
|
|
179
181
|
|
|
180
182
|
async def async_custom_invoke(
|
|
181
|
-
self, operation: Optional[Callable[..., Awaitable[Any]]], **invoke_kwargs
|
|
183
|
+
self, operation: Optional[Callable[..., Awaitable[Any]]] = None, **invoke_kwargs
|
|
182
184
|
) -> Any:
|
|
183
185
|
"""
|
|
184
186
|
Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
|
|
@@ -263,5 +265,61 @@ class ModelProvider(BaseRemoteClient):
|
|
|
263
265
|
invoke_response_format=InvokeResponseFormat.FULL,
|
|
264
266
|
**invoke_kwargs,
|
|
265
267
|
) -> Union[str, dict[str, Any], Any]:
|
|
266
|
-
"""
|
|
268
|
+
"""
|
|
269
|
+
Asynchronously invokes a generative AI model with the provided messages and additional parameters.
|
|
270
|
+
This method is designed to be a flexible interface for interacting with various
|
|
271
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
272
|
+
a list of messages (following a standardized format) and receive a response.
|
|
273
|
+
|
|
274
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
275
|
+
Each dictionary should follow the format::
|
|
276
|
+
{"role": "system"| "user" | "assistant" ..., "content":
|
|
277
|
+
"Message content as a string"}
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
|
|
281
|
+
.. code-block:: json
|
|
282
|
+
|
|
283
|
+
[
|
|
284
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
285
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
289
|
+
are provided.
|
|
290
|
+
|
|
291
|
+
:param invoke_response_format: Determines how the model response is returned:
|
|
292
|
+
|
|
293
|
+
- string: Returns only the generated text content from the model output,
|
|
294
|
+
for single-answer responses only.
|
|
295
|
+
|
|
296
|
+
- usage: Combines the STRING response with additional metadata (token usage),
|
|
297
|
+
and returns the result in a dictionary.
|
|
298
|
+
|
|
299
|
+
Note: The usage dictionary may contain additional
|
|
300
|
+
keys depending on the model provider:
|
|
301
|
+
|
|
302
|
+
.. code-block:: json
|
|
303
|
+
|
|
304
|
+
{
|
|
305
|
+
"answer": "<generated_text>",
|
|
306
|
+
"usage": {
|
|
307
|
+
"prompt_tokens": <int>,
|
|
308
|
+
"completion_tokens": <int>,
|
|
309
|
+
"total_tokens": <int>
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
- full: Returns the full model output.
|
|
315
|
+
|
|
316
|
+
:param invoke_kwargs:
|
|
317
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
318
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
319
|
+
depending on the capabilities of the specific backend being used.
|
|
320
|
+
|
|
321
|
+
:return: The invoke result formatted according to the specified
|
|
322
|
+
invoke_response_format parameter.
|
|
323
|
+
|
|
324
|
+
"""
|
|
267
325
|
raise NotImplementedError("async_invoke is not implemented")
|
|
@@ -80,8 +80,12 @@ class OpenAIProvider(ModelProvider):
|
|
|
80
80
|
@staticmethod
|
|
81
81
|
def _extract_string_output(response: "ChatCompletion") -> str:
|
|
82
82
|
"""
|
|
83
|
-
Extracts the first
|
|
84
|
-
|
|
83
|
+
Extracts the text content of the first choice from an OpenAI ChatCompletion response.
|
|
84
|
+
Only supports responses with a single choice. Raises an error if multiple choices exist.
|
|
85
|
+
|
|
86
|
+
:param response: The ChatCompletion response from OpenAI.
|
|
87
|
+
:return: The text content of the first message in the response.
|
|
88
|
+
:raises MLRunInvalidArgumentError: If the response contains more than one choice.
|
|
85
89
|
"""
|
|
86
90
|
if len(response.choices) != 1:
|
|
87
91
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -115,13 +119,14 @@ class OpenAIProvider(ModelProvider):
|
|
|
115
119
|
The client is created only if it does not already exist.
|
|
116
120
|
Raises ImportError if the openai package is not installed.
|
|
117
121
|
"""
|
|
118
|
-
if
|
|
119
|
-
|
|
120
|
-
|
|
122
|
+
if self._client:
|
|
123
|
+
return
|
|
124
|
+
try:
|
|
125
|
+
from openai import OpenAI # noqa
|
|
121
126
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
127
|
+
self._client = OpenAI(**self.options)
|
|
128
|
+
except ImportError as exc:
|
|
129
|
+
raise ImportError("openai package is not installed") from exc
|
|
125
130
|
|
|
126
131
|
def load_async_client(self) -> None:
|
|
127
132
|
"""
|
|
@@ -163,25 +168,37 @@ class OpenAIProvider(ModelProvider):
|
|
|
163
168
|
self, operation: Optional[Callable] = None, **invoke_kwargs
|
|
164
169
|
) -> Union["ChatCompletion", "BaseModel"]:
|
|
165
170
|
"""
|
|
166
|
-
OpenAI
|
|
171
|
+
Invokes a model operation from the OpenAI client with the given keyword arguments.
|
|
167
172
|
|
|
168
|
-
|
|
169
|
-
`
|
|
173
|
+
This method provides flexibility to either:
|
|
174
|
+
- Call a specific OpenAI client operation (e.g., `client.images.generate`).
|
|
175
|
+
- Default to `chat.completions.create` when no operation is provided.
|
|
176
|
+
|
|
177
|
+
The operation must be a callable that accepts keyword arguments. If the callable
|
|
178
|
+
does not accept a `model` parameter, it will be omitted from the call.
|
|
170
179
|
|
|
171
180
|
Example:
|
|
172
181
|
```python
|
|
173
|
-
result = openai_model_provider.
|
|
182
|
+
result = openai_model_provider.custom_invoke(
|
|
174
183
|
openai_model_provider.client.images.generate,
|
|
175
184
|
prompt="A futuristic cityscape at sunset",
|
|
176
185
|
n=1,
|
|
177
186
|
size="1024x1024",
|
|
178
187
|
)
|
|
179
188
|
```
|
|
180
|
-
:param operation: Same as ModelProvider.custom_invoke.
|
|
181
|
-
:param invoke_kwargs: Same as ModelProvider.custom_invoke.
|
|
182
|
-
:return: Same as ModelProvider.custom_invoke.
|
|
183
189
|
|
|
190
|
+
:param operation: A callable representing the OpenAI operation to invoke.
|
|
191
|
+
If not provided, defaults to `client.chat.completions.create`.
|
|
192
|
+
|
|
193
|
+
:param invoke_kwargs: Additional keyword arguments to pass to the operation.
|
|
194
|
+
These are merged with `default_invoke_kwargs` and may
|
|
195
|
+
include parameters such as `temperature`, `max_tokens`,
|
|
196
|
+
or `messages`.
|
|
197
|
+
|
|
198
|
+
:return: The full response returned by the operation, typically
|
|
199
|
+
an OpenAI `ChatCompletion` or other OpenAI SDK model.
|
|
184
200
|
"""
|
|
201
|
+
|
|
185
202
|
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
186
203
|
model_kwargs = {"model": invoke_kwargs.pop("model", None) or self.model}
|
|
187
204
|
|
|
@@ -202,24 +219,35 @@ class OpenAIProvider(ModelProvider):
|
|
|
202
219
|
**invoke_kwargs,
|
|
203
220
|
) -> Union["ChatCompletion", "BaseModel"]:
|
|
204
221
|
"""
|
|
205
|
-
OpenAI
|
|
222
|
+
Asynchronously invokes a model operation from the OpenAI client with the given keyword arguments.
|
|
206
223
|
|
|
207
|
-
|
|
208
|
-
`
|
|
224
|
+
This method provides flexibility to either:
|
|
225
|
+
- Call a specific async OpenAI client operation (e.g., `async_client.images.generate`).
|
|
226
|
+
- Default to `chat.completions.create` when no operation is provided.
|
|
227
|
+
|
|
228
|
+
The operation must be an async callable that accepts keyword arguments.
|
|
229
|
+
If the callable does not accept a `model` parameter, it will be omitted from the call.
|
|
209
230
|
|
|
210
231
|
Example:
|
|
211
|
-
|
|
212
|
-
result = openai_model_provider.
|
|
232
|
+
```python
|
|
233
|
+
result = await openai_model_provider.async_custom_invoke(
|
|
213
234
|
openai_model_provider.async_client.images.generate,
|
|
214
235
|
prompt="A futuristic cityscape at sunset",
|
|
215
236
|
n=1,
|
|
216
237
|
size="1024x1024",
|
|
217
238
|
)
|
|
218
|
-
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
:param operation: An async callable representing the OpenAI operation to invoke.
|
|
242
|
+
If not provided, defaults to `async_client.chat.completions.create`.
|
|
219
243
|
|
|
220
|
-
:param
|
|
221
|
-
|
|
222
|
-
|
|
244
|
+
:param invoke_kwargs: Additional keyword arguments to pass to the operation.
|
|
245
|
+
These are merged with `default_invoke_kwargs` and may
|
|
246
|
+
include parameters such as `temperature`, `max_tokens`,
|
|
247
|
+
or `messages`.
|
|
248
|
+
|
|
249
|
+
:return: The full response returned by the awaited operation,
|
|
250
|
+
typically an OpenAI `ChatCompletion` or other OpenAI SDK model.
|
|
223
251
|
|
|
224
252
|
"""
|
|
225
253
|
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
@@ -248,10 +276,10 @@ class OpenAIProvider(ModelProvider):
|
|
|
248
276
|
if invoke_response_format == InvokeResponseFormat.STRING:
|
|
249
277
|
return str_response
|
|
250
278
|
if invoke_response_format == InvokeResponseFormat.USAGE:
|
|
251
|
-
|
|
279
|
+
usage = response.to_dict()["usage"]
|
|
252
280
|
response = {
|
|
253
281
|
UsageResponseKeys.ANSWER: str_response,
|
|
254
|
-
UsageResponseKeys.USAGE:
|
|
282
|
+
UsageResponseKeys.USAGE: usage,
|
|
255
283
|
}
|
|
256
284
|
return response
|
|
257
285
|
|
|
@@ -264,27 +292,42 @@ class OpenAIProvider(ModelProvider):
|
|
|
264
292
|
"""
|
|
265
293
|
OpenAI-specific implementation of `ModelProvider.invoke`.
|
|
266
294
|
Invokes an OpenAI model operation using the synchronous client.
|
|
267
|
-
For full details, see `ModelProvider.invoke`.
|
|
268
295
|
|
|
269
296
|
:param messages:
|
|
270
|
-
|
|
297
|
+
A list of dictionaries representing the conversation history or input messages.
|
|
298
|
+
Each dictionary should follow the format::
|
|
299
|
+
{
|
|
300
|
+
"role": "system" | "user" | "assistant",
|
|
301
|
+
"content": "Message content as a string",
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
Example:
|
|
305
|
+
|
|
306
|
+
.. code-block:: json
|
|
271
307
|
|
|
272
|
-
|
|
308
|
+
[
|
|
309
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
310
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
311
|
+
]
|
|
312
|
+
|
|
313
|
+
Defaults to None if no messages are provided.
|
|
314
|
+
|
|
315
|
+
:param invoke_response_format:
|
|
273
316
|
Specifies the format of the returned response. Options:
|
|
274
317
|
|
|
275
318
|
- "string": Returns only the generated text content, taken from a single response.
|
|
276
|
-
- "
|
|
319
|
+
- "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary::
|
|
277
320
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
321
|
+
.. code-block:: json
|
|
322
|
+
{
|
|
323
|
+
"answer": "<generated_text>",
|
|
324
|
+
"usage": <ChatCompletion>.to_dict()["usage"]
|
|
325
|
+
}
|
|
283
326
|
|
|
284
327
|
- "full": Returns the full OpenAI `ChatCompletion` object.
|
|
285
328
|
|
|
286
329
|
:param invoke_kwargs:
|
|
287
|
-
Additional keyword arguments passed to the OpenAI client.
|
|
330
|
+
Additional keyword arguments passed to the OpenAI client.
|
|
288
331
|
|
|
289
332
|
:return:
|
|
290
333
|
A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
|
|
@@ -305,18 +348,46 @@ class OpenAIProvider(ModelProvider):
|
|
|
305
348
|
) -> Union[str, "ChatCompletion", dict]:
|
|
306
349
|
"""
|
|
307
350
|
OpenAI-specific implementation of `ModelProvider.async_invoke`.
|
|
308
|
-
Invokes an OpenAI model operation using the
|
|
309
|
-
|
|
351
|
+
Invokes an OpenAI model operation using the asynchronous client.
|
|
352
|
+
|
|
353
|
+
:param messages:
|
|
354
|
+
A list of dictionaries representing the conversation history or input messages.
|
|
355
|
+
Each dictionary should follow the format::
|
|
356
|
+
{
|
|
357
|
+
"role": "system" | "user" | "assistant",
|
|
358
|
+
"content": "Message content as a string",
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
Example:
|
|
362
|
+
|
|
363
|
+
.. code-block:: json
|
|
364
|
+
|
|
365
|
+
[
|
|
366
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
367
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
368
|
+
]
|
|
310
369
|
|
|
311
|
-
|
|
370
|
+
Defaults to None if no messages are provided.
|
|
312
371
|
|
|
313
|
-
:param invoke_response_format:
|
|
314
|
-
|
|
372
|
+
:param invoke_response_format:
|
|
373
|
+
Specifies the format of the returned response. Options:
|
|
374
|
+
|
|
375
|
+
- "string": Returns only the generated text content, taken from a single response.
|
|
376
|
+
- "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary::
|
|
377
|
+
|
|
378
|
+
.. code-block:: json
|
|
379
|
+
{
|
|
380
|
+
"answer": "<generated_text>",
|
|
381
|
+
"usage": <ChatCompletion>.to_dict()["usage"]
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
- "full": Returns the full OpenAI `ChatCompletion` object.
|
|
315
385
|
|
|
316
386
|
:param invoke_kwargs:
|
|
317
|
-
|
|
318
|
-
:returns Same as `ModelProvider.async_invoke`.
|
|
387
|
+
Additional keyword arguments passed to the OpenAI client.
|
|
319
388
|
|
|
389
|
+
:return:
|
|
390
|
+
A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
|
|
320
391
|
"""
|
|
321
392
|
response = await self.async_custom_invoke(messages=messages, **invoke_kwargs)
|
|
322
393
|
return self._response_handler(
|
mlrun/datastore/s3.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
|
+
import warnings
|
|
16
17
|
from typing import Optional
|
|
17
18
|
from urllib.parse import urlparse
|
|
18
19
|
|
|
@@ -28,6 +29,27 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
|
|
|
28
29
|
class S3Store(DataStore):
|
|
29
30
|
using_bucket = True
|
|
30
31
|
|
|
32
|
+
# TODO: Remove this in 1.12.0
|
|
33
|
+
def _get_endpoint_url_with_deprecation_warning(self):
|
|
34
|
+
"""Get S3 endpoint URL with backward compatibility for deprecated S3_ENDPOINT_URL"""
|
|
35
|
+
# First try the new environment variable
|
|
36
|
+
endpoint_url = self._get_secret_or_env("AWS_ENDPOINT_URL_S3")
|
|
37
|
+
if endpoint_url:
|
|
38
|
+
return endpoint_url
|
|
39
|
+
|
|
40
|
+
# Check for deprecated environment variable
|
|
41
|
+
deprecated_endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
|
|
42
|
+
if deprecated_endpoint_url:
|
|
43
|
+
warnings.warn(
|
|
44
|
+
"S3_ENDPOINT_URL is deprecated in 1.10.0 and will be removed in 1.12.0, "
|
|
45
|
+
"use AWS_ENDPOINT_URL_S3 instead.",
|
|
46
|
+
# TODO: Remove this in 1.12.0
|
|
47
|
+
FutureWarning,
|
|
48
|
+
)
|
|
49
|
+
return deprecated_endpoint_url
|
|
50
|
+
|
|
51
|
+
return None
|
|
52
|
+
|
|
31
53
|
def __init__(
|
|
32
54
|
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
33
55
|
):
|
|
@@ -41,7 +63,7 @@ class S3Store(DataStore):
|
|
|
41
63
|
access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
|
|
42
64
|
secret_key = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
|
|
43
65
|
token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
|
|
44
|
-
endpoint_url = self.
|
|
66
|
+
endpoint_url = self._get_endpoint_url_with_deprecation_warning()
|
|
45
67
|
force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
|
|
46
68
|
profile_name = self._get_secret_or_env("AWS_PROFILE")
|
|
47
69
|
assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
|
|
@@ -159,7 +181,7 @@ class S3Store(DataStore):
|
|
|
159
181
|
def get_storage_options(self):
|
|
160
182
|
force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
|
|
161
183
|
profile = self._get_secret_or_env("AWS_PROFILE")
|
|
162
|
-
endpoint_url = self.
|
|
184
|
+
endpoint_url = self._get_endpoint_url_with_deprecation_warning()
|
|
163
185
|
access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
|
|
164
186
|
secret = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
|
|
165
187
|
token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
|
mlrun/datastore/storeytargets.py
CHANGED
|
@@ -18,10 +18,9 @@ from mergedeep import merge
|
|
|
18
18
|
from storey import V3ioDriver
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
|
-
import mlrun.model_monitoring.helpers
|
|
22
21
|
from mlrun.datastore.base import DataStore
|
|
23
22
|
from mlrun.datastore.datastore_profile import (
|
|
24
|
-
|
|
23
|
+
DatastoreProfileKafkaStream,
|
|
25
24
|
DatastoreProfileKafkaTarget,
|
|
26
25
|
DatastoreProfileTDEngine,
|
|
27
26
|
datastore_profile_read,
|
|
@@ -138,7 +137,7 @@ class KafkaStoreyTarget(storey.KafkaTarget):
|
|
|
138
137
|
datastore_profile = datastore_profile_read(path)
|
|
139
138
|
if not isinstance(
|
|
140
139
|
datastore_profile,
|
|
141
|
-
(
|
|
140
|
+
(DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
|
|
142
141
|
):
|
|
143
142
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
144
143
|
f"Unsupported datastore profile type: {type(datastore_profile)}"
|