mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show
  1. mlrun/artifacts/llm_prompt.py +8 -1
  2. mlrun/common/model_monitoring/helpers.py +86 -0
  3. mlrun/common/schemas/hub.py +25 -18
  4. mlrun/common/schemas/model_monitoring/constants.py +1 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
  6. mlrun/config.py +2 -3
  7. mlrun/datastore/__init__.py +2 -2
  8. mlrun/datastore/azure_blob.py +66 -43
  9. mlrun/datastore/datastore_profile.py +35 -5
  10. mlrun/datastore/model_provider/huggingface_provider.py +122 -30
  11. mlrun/datastore/model_provider/model_provider.py +62 -4
  12. mlrun/datastore/model_provider/openai_provider.py +114 -43
  13. mlrun/datastore/s3.py +24 -2
  14. mlrun/datastore/storeytargets.py +2 -3
  15. mlrun/db/base.py +15 -1
  16. mlrun/db/httpdb.py +17 -6
  17. mlrun/db/nopdb.py +14 -0
  18. mlrun/k8s_utils.py +0 -14
  19. mlrun/model_monitoring/api.py +2 -2
  20. mlrun/model_monitoring/applications/base.py +37 -10
  21. mlrun/model_monitoring/applications/context.py +1 -4
  22. mlrun/model_monitoring/controller.py +15 -5
  23. mlrun/model_monitoring/db/_schedules.py +2 -4
  24. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
  25. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  26. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
  27. mlrun/model_monitoring/helpers.py +5 -5
  28. mlrun/platforms/iguazio.py +7 -3
  29. mlrun/projects/project.py +33 -29
  30. mlrun/runtimes/base.py +0 -3
  31. mlrun/runtimes/mounts.py +15 -2
  32. mlrun/runtimes/nuclio/__init__.py +1 -0
  33. mlrun/runtimes/nuclio/application/application.py +11 -2
  34. mlrun/runtimes/nuclio/function.py +10 -0
  35. mlrun/runtimes/nuclio/serving.py +4 -0
  36. mlrun/runtimes/pod.py +153 -11
  37. mlrun/runtimes/utils.py +22 -5
  38. mlrun/serving/routers.py +23 -41
  39. mlrun/serving/server.py +26 -14
  40. mlrun/serving/states.py +3 -3
  41. mlrun/serving/system_steps.py +52 -29
  42. mlrun/serving/v2_serving.py +9 -10
  43. mlrun/utils/helpers.py +5 -2
  44. mlrun/utils/version/version.json +2 -2
  45. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
  46. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
  47. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
  48. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
  49. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
  50. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,9 @@ class HuggingFaceProvider(ModelProvider):
36
36
  This class extends the ModelProvider base class and implements Hugging Face-specific
37
37
  functionality, including pipeline initialization, default text generation operations,
38
38
  and custom operations tailored to the Hugging Face Transformers pipeline API.
39
+
40
+ Note: The pipeline object will download the model (if not already cached) and load it
41
+ into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
39
42
  """
40
43
 
41
44
  def __init__(
@@ -62,13 +65,12 @@ class HuggingFaceProvider(ModelProvider):
62
65
  )
63
66
  self.options = self.get_client_options()
64
67
  self._expected_operation_type = None
65
- self.load_client()
68
+ self._download_model()
66
69
 
67
70
  @staticmethod
68
71
  def _extract_string_output(response: list[dict]) -> str:
69
72
  """
70
- Extracts the first generated string from Hugging Face pipeline output,
71
- regardless of whether it's plain text-generation or chat-style output.
73
+ Extracts the first generated string from Hugging Face pipeline output
72
74
  """
73
75
  if not isinstance(response, list) or len(response) == 0:
74
76
  raise ValueError("Empty or invalid pipeline output")
@@ -86,6 +88,39 @@ class HuggingFaceProvider(ModelProvider):
86
88
  subpath = ""
87
89
  return endpoint, subpath
88
90
 
91
+ @property
92
+ def client(self) -> Any:
93
+ """
94
+ Lazily return the HuggingFace-pipeline client.
95
+
96
+ If the client has not been initialized yet, it will be created
97
+ by calling `load_client`.
98
+ """
99
+ self.load_client()
100
+ return self._client
101
+
102
+ def _download_model(self):
103
+ """
104
+ Pre-downloads model files locally to prevent race conditions in multiprocessing.
105
+
106
+ Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
107
+ file copying for safe concurrent access across multiple processes.
108
+
109
+ :raises:
110
+ ImportError: If huggingface_hub package is not installed.
111
+ """
112
+ try:
113
+ from huggingface_hub import snapshot_download
114
+
115
+ # Download the model and tokenizer files directly to the cache.
116
+ snapshot_download(
117
+ repo_id=self.model,
118
+ local_dir_use_symlinks=False,
119
+ token=self._get_secret_or_env("HF_TOKEN") or None,
120
+ )
121
+ except ImportError as exc:
122
+ raise ImportError("huggingface_hub package is not installed") from exc
123
+
89
124
  def _response_handler(
90
125
  self,
91
126
  response: Union[str, list],
@@ -94,27 +129,46 @@ class HuggingFaceProvider(ModelProvider):
94
129
  **kwargs,
95
130
  ) -> Union[str, list, dict[str, Any]]:
96
131
  """
97
- Same as `ModelProvider._response_handler`.
132
+ Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
133
+
134
+ The response should exclude the user’s input (no repetition in the output).
135
+ This can be accomplished by invoking the pipeline with `return_full_text=False`.
136
+
137
+ :param response: The raw response from the HuggingFace pipeline, typically a list of dictionaries
138
+ containing generated text sequences.
139
+ :param invoke_response_format: Determines how the response should be processed and returned. Options:
140
+
141
+ - STRING: Return only the main generated content as a string,
142
+ for single-answer responses.
143
+ - USAGE: Return a dictionary combining the string response with
144
+ token usage statistics:
98
145
 
99
- * Expected to receive the response with `return_full_text=False`.
146
+ .. code-block:: json
100
147
 
101
- :param messages: Same as in `ModelProvider._response_handler`.
102
- :param response: Same as in `ModelProvider._response_handler`.
103
- :param invoke_response_format: Same as in `ModelProvider._response_handler`, in full and string modes.
148
+ {
149
+ "answer": "<generated_text>",
150
+ "usage": {
151
+ "prompt_tokens": <int>,
152
+ "completion_tokens": <int>,
153
+ "total_tokens": <int>
154
+ }
155
+ }
104
156
 
105
- For usage mode, generate 3 statistics:
106
- prompt_tokens, completion_tokens and total_tokens.
157
+ Note: Token counts are estimated after answer generation and
158
+ may differ from the actual tokens generated by the model due to
159
+ internal decoding behavior and implementation details.
107
160
 
108
- NOTE: Token counts are estimated after answer generation and
109
- may differ from the actual tokens generated by the model due to
110
- internal decoding behavior and implementation details.
161
+ - FULL: Return the full raw response object.
111
162
 
112
- :param kwargs: Same as in `ModelProvider._response_handler`.
163
+ :param messages: The original input messages used for token count estimation in USAGE mode.
164
+ Can be a string, list of strings, or chat format messages.
165
+ :param kwargs: Additional parameters for response processing.
113
166
 
114
- :return: The result formatted according to the `invoke_response_format`.
167
+ :return: The processed response in the format specified by `invoke_response_format`.
168
+ Can be a string, dictionary, or the original response object.
115
169
 
116
170
  :raises MLRunInvalidArgumentError: If extracting the string response fails.
117
- :raises MLRunRuntimeError: If applying the chat template to the model fails.
171
+ :raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
118
172
  """
119
173
  if InvokeResponseFormat.is_str_response(invoke_response_format.value):
120
174
  str_response = self._extract_string_output(response)
@@ -161,11 +215,15 @@ class HuggingFaceProvider(ModelProvider):
161
215
  :raises:
162
216
  ImportError: If the `transformers` package is not installed.
163
217
  """
218
+ if self._client:
219
+ return
164
220
  try:
165
221
  from transformers import pipeline, AutoModelForCausalLM # noqa
166
222
  from transformers import AutoTokenizer # noqa
167
223
  from transformers.pipelines.base import Pipeline # noqa
168
224
 
225
+ self.options["model_kwargs"] = self.options.get("model_kwargs", {})
226
+ self.options["model_kwargs"]["local_files_only"] = True
169
227
  self._client = pipeline(model=self.model, **self.options)
170
228
  self._expected_operation_type = Pipeline
171
229
  except ImportError as exc:
@@ -186,23 +244,38 @@ class HuggingFaceProvider(ModelProvider):
186
244
  self, operation: Optional["Pipeline"] = None, **invoke_kwargs
187
245
  ) -> Union[list, dict, Any]:
188
246
  """
189
- HuggingFace implementation of `ModelProvider.custom_invoke`.
190
- Use the default config in provider client/ user defined client:
247
+ Invokes a HuggingFace pipeline operation with the given keyword arguments.
248
+
249
+ This method provides flexibility to use a custom pipeline object for specific tasks
250
+ (e.g., image classification, sentiment analysis).
251
+
252
+ The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
191
253
 
192
254
  Example:
193
- ```python
255
+ ```python
256
+ from transformers import pipeline
257
+ from PIL import Image
258
+
259
+ # Using custom pipeline for image classification
194
260
  image = Image.open(image_path)
195
- pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
261
+ pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
196
262
  result = hf_provider.custom_invoke(
197
263
  pipeline_object,
198
264
  inputs=image,
199
265
  )
200
- ```
266
+ ```
267
+
268
+ :param operation: A Pipeline object from the transformers library.
269
+ If not provided, defaults to the provider's configured pipeline.
270
+ :param invoke_kwargs: Keyword arguments to pass to the pipeline operation.
271
+ These are merged with `default_invoke_kwargs` and may include
272
+ parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
201
273
 
274
+ :return: The full response returned by the pipeline operation.
275
+ Format depends on the pipeline task (list for text generation,
276
+ dict for classification, etc.).
202
277
 
203
- :param operation: A pipeline object
204
- :param invoke_kwargs: Keyword arguments to pass to the operation.
205
- :return: The full response returned by the operation.
278
+ :raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
206
279
 
207
280
  """
208
281
  invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
@@ -222,12 +295,24 @@ class HuggingFaceProvider(ModelProvider):
222
295
  **invoke_kwargs,
223
296
  ) -> Union[str, list, dict[str, Any]]:
224
297
  """
225
- HuggingFace-specific implementation of `ModelProvider.invoke`.
226
- Invokes a HuggingFace model operation using the synchronous client.
227
- For full details, see `ModelProvider.invoke`.
298
+ HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
299
+ Invokes a HuggingFace model operation for text generation tasks.
300
+
301
+ Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
228
302
 
229
303
  :param messages:
230
- Same as `ModelProvider.invoke`.
304
+ Input for the text generation model. Can be provided in multiple formats:
305
+
306
+ - A single string: Direct text input for generation
307
+ - A list of strings: Multiple text inputs for batch processing
308
+ - Chat format: A list of dictionaries with "role" and "content" keys:
309
+
310
+ .. code-block:: json
311
+
312
+ [
313
+ {"role": "system", "content": "You are a helpful assistant."},
314
+ {"role": "user", "content": "What is the capital of France?"}
315
+ ]
231
316
 
232
317
  :param invoke_response_format: InvokeResponseFormat
233
318
  Specifies the format of the returned response. Options:
@@ -245,17 +330,24 @@ class HuggingFaceProvider(ModelProvider):
245
330
  }
246
331
  }
247
332
 
333
+ Note: For usage mode, the model tokenizer should support apply_chat_template.
334
+
248
335
  - "full": Returns the raw response object from the HuggingFace model,
249
336
  typically a list of generated sequences (dictionaries).
250
337
  This format does not include token usage statistics.
251
338
 
252
339
  :param invoke_kwargs:
253
- Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
340
+ Additional keyword arguments passed to the HuggingFace pipeline.
254
341
 
255
342
  :return:
256
343
  A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
257
- """
258
344
 
345
+ :raises MLRunInvalidArgumentError:
346
+ If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
347
+ string content.
348
+ :raises MLRunRuntimeError:
349
+ If using "usage" response mode and the model tokenizer does not support chat template formatting.
350
+ """
259
351
  if self.client.task != "text-generation":
260
352
  raise mlrun.errors.MLRunInvalidArgumentError(
261
353
  "HuggingFaceProvider.invoke supports text-generation task only"
@@ -108,7 +108,7 @@ class ModelProvider(BaseRemoteClient):
108
108
  additional metadata or token usage statistics, in this format:
109
109
  {"answer": <string>, "usage": <dict>}
110
110
 
111
- - FULL: Return the full raw response object unmodified.
111
+ - FULL: Return the full raw response object.
112
112
 
113
113
  :param kwargs: Additional parameters that may be required by specific implementations.
114
114
 
@@ -164,7 +164,9 @@ class ModelProvider(BaseRemoteClient):
164
164
  )
165
165
  return self._async_client
166
166
 
167
- def custom_invoke(self, operation: Optional[Callable], **invoke_kwargs) -> Any:
167
+ def custom_invoke(
168
+ self, operation: Optional[Callable] = None, **invoke_kwargs
169
+ ) -> Any:
168
170
  """
169
171
  Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
170
172
 
@@ -178,7 +180,7 @@ class ModelProvider(BaseRemoteClient):
178
180
  raise NotImplementedError("custom_invoke method is not implemented")
179
181
 
180
182
  async def async_custom_invoke(
181
- self, operation: Optional[Callable[..., Awaitable[Any]]], **invoke_kwargs
183
+ self, operation: Optional[Callable[..., Awaitable[Any]]] = None, **invoke_kwargs
182
184
  ) -> Any:
183
185
  """
184
186
  Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
@@ -263,5 +265,61 @@ class ModelProvider(BaseRemoteClient):
263
265
  invoke_response_format=InvokeResponseFormat.FULL,
264
266
  **invoke_kwargs,
265
267
  ) -> Union[str, dict[str, Any], Any]:
266
- """Async version of `invoke`. See `invoke` for full documentation."""
268
+ """
269
+ Asynchronously invokes a generative AI model with the provided messages and additional parameters.
270
+ This method is designed to be a flexible interface for interacting with various
271
+ generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
272
+ a list of messages (following a standardized format) and receive a response.
273
+
274
+ :param messages: A list of dictionaries representing the conversation history or input messages.
275
+ Each dictionary should follow the format::
276
+ {"role": "system"| "user" | "assistant" ..., "content":
277
+ "Message content as a string"}
278
+
279
+ Example:
280
+
281
+ .. code-block:: json
282
+
283
+ [
284
+ {"role": "system", "content": "You are a helpful assistant."},
285
+ {"role": "user", "content": "What is the capital of France?"}
286
+ ]
287
+
288
+ This format is consistent across all backends. Defaults to None if no messages
289
+ are provided.
290
+
291
+ :param invoke_response_format: Determines how the model response is returned:
292
+
293
+ - string: Returns only the generated text content from the model output,
294
+ for single-answer responses only.
295
+
296
+ - usage: Combines the STRING response with additional metadata (token usage),
297
+ and returns the result in a dictionary.
298
+
299
+ Note: The usage dictionary may contain additional
300
+ keys depending on the model provider:
301
+
302
+ .. code-block:: json
303
+
304
+ {
305
+ "answer": "<generated_text>",
306
+ "usage": {
307
+ "prompt_tokens": <int>,
308
+ "completion_tokens": <int>,
309
+ "total_tokens": <int>
310
+ }
311
+
312
+ }
313
+
314
+ - full: Returns the full model output.
315
+
316
+ :param invoke_kwargs:
317
+ Additional keyword arguments to be passed to the underlying model API call.
318
+ These can include parameters such as temperature, max tokens, etc.,
319
+ depending on the capabilities of the specific backend being used.
320
+
321
+ :return: The invoke result formatted according to the specified
322
+ invoke_response_format parameter.
323
+
324
+ """
267
325
  raise NotImplementedError("async_invoke is not implemented")
@@ -80,8 +80,12 @@ class OpenAIProvider(ModelProvider):
80
80
  @staticmethod
81
81
  def _extract_string_output(response: "ChatCompletion") -> str:
82
82
  """
83
- Extracts the first generated string from Hugging Face pipeline output,
84
- regardless of whether it's plain text-generation or chat-style output.
83
+ Extracts the text content of the first choice from an OpenAI ChatCompletion response.
84
+ Only supports responses with a single choice. Raises an error if multiple choices exist.
85
+
86
+ :param response: The ChatCompletion response from OpenAI.
87
+ :return: The text content of the first message in the response.
88
+ :raises MLRunInvalidArgumentError: If the response contains more than one choice.
85
89
  """
86
90
  if len(response.choices) != 1:
87
91
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -115,13 +119,14 @@ class OpenAIProvider(ModelProvider):
115
119
  The client is created only if it does not already exist.
116
120
  Raises ImportError if the openai package is not installed.
117
121
  """
118
- if not self._client:
119
- try:
120
- from openai import OpenAI # noqa
122
+ if self._client:
123
+ return
124
+ try:
125
+ from openai import OpenAI # noqa
121
126
 
122
- self._client = OpenAI(**self.options)
123
- except ImportError as exc:
124
- raise ImportError("openai package is not installed") from exc
127
+ self._client = OpenAI(**self.options)
128
+ except ImportError as exc:
129
+ raise ImportError("openai package is not installed") from exc
125
130
 
126
131
  def load_async_client(self) -> None:
127
132
  """
@@ -163,25 +168,37 @@ class OpenAIProvider(ModelProvider):
163
168
  self, operation: Optional[Callable] = None, **invoke_kwargs
164
169
  ) -> Union["ChatCompletion", "BaseModel"]:
165
170
  """
166
- OpenAI-specific implementation of `ModelProvider.custom_invoke`.
171
+ Invokes a model operation from the OpenAI client with the given keyword arguments.
167
172
 
168
- Invokes an OpenAI model operation using the sync client. For full details, see
169
- `ModelProvider.custom_invoke`.
173
+ This method provides flexibility to either:
174
+ - Call a specific OpenAI client operation (e.g., `client.images.generate`).
175
+ - Default to `chat.completions.create` when no operation is provided.
176
+
177
+ The operation must be a callable that accepts keyword arguments. If the callable
178
+ does not accept a `model` parameter, it will be omitted from the call.
170
179
 
171
180
  Example:
172
181
  ```python
173
- result = openai_model_provider.invoke(
182
+ result = openai_model_provider.custom_invoke(
174
183
  openai_model_provider.client.images.generate,
175
184
  prompt="A futuristic cityscape at sunset",
176
185
  n=1,
177
186
  size="1024x1024",
178
187
  )
179
188
  ```
180
- :param operation: Same as ModelProvider.custom_invoke.
181
- :param invoke_kwargs: Same as ModelProvider.custom_invoke.
182
- :return: Same as ModelProvider.custom_invoke.
183
189
 
190
+ :param operation: A callable representing the OpenAI operation to invoke.
191
+ If not provided, defaults to `client.chat.completions.create`.
192
+
193
+ :param invoke_kwargs: Additional keyword arguments to pass to the operation.
194
+ These are merged with `default_invoke_kwargs` and may
195
+ include parameters such as `temperature`, `max_tokens`,
196
+ or `messages`.
197
+
198
+ :return: The full response returned by the operation, typically
199
+ an OpenAI `ChatCompletion` or other OpenAI SDK model.
184
200
  """
201
+
185
202
  invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
186
203
  model_kwargs = {"model": invoke_kwargs.pop("model", None) or self.model}
187
204
 
@@ -202,24 +219,35 @@ class OpenAIProvider(ModelProvider):
202
219
  **invoke_kwargs,
203
220
  ) -> Union["ChatCompletion", "BaseModel"]:
204
221
  """
205
- OpenAI-specific implementation of `ModelProvider.async_custom_invoke`.
222
+ Asynchronously invokes a model operation from the OpenAI client with the given keyword arguments.
206
223
 
207
- Invokes an OpenAI model operation using the async client. For full details, see
208
- `ModelProvider.async_custom_invoke`.
224
+ This method provides flexibility to either:
225
+ - Call a specific async OpenAI client operation (e.g., `async_client.images.generate`).
226
+ - Default to `chat.completions.create` when no operation is provided.
227
+
228
+ The operation must be an async callable that accepts keyword arguments.
229
+ If the callable does not accept a `model` parameter, it will be omitted from the call.
209
230
 
210
231
  Example:
211
- ```python
212
- result = openai_model_provider.invoke(
232
+ ```python
233
+ result = await openai_model_provider.async_custom_invoke(
213
234
  openai_model_provider.async_client.images.generate,
214
235
  prompt="A futuristic cityscape at sunset",
215
236
  n=1,
216
237
  size="1024x1024",
217
238
  )
218
- ```
239
+ ```
240
+
241
+ :param operation: An async callable representing the OpenAI operation to invoke.
242
+ If not provided, defaults to `async_client.chat.completions.create`.
219
243
 
220
- :param operation: Same as ModelProvider.async_custom_invoke.
221
- :param invoke_kwargs: Same as ModelProvider.async_custom_invoke.
222
- :return: Same as ModelProvider.async_custom_invoke.
244
+ :param invoke_kwargs: Additional keyword arguments to pass to the operation.
245
+ These are merged with `default_invoke_kwargs` and may
246
+ include parameters such as `temperature`, `max_tokens`,
247
+ or `messages`.
248
+
249
+ :return: The full response returned by the awaited operation,
250
+ typically an OpenAI `ChatCompletion` or other OpenAI SDK model.
223
251
 
224
252
  """
225
253
  invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
@@ -248,10 +276,10 @@ class OpenAIProvider(ModelProvider):
248
276
  if invoke_response_format == InvokeResponseFormat.STRING:
249
277
  return str_response
250
278
  if invoke_response_format == InvokeResponseFormat.USAGE:
251
- stats = response.to_dict()["usage"]
279
+ usage = response.to_dict()["usage"]
252
280
  response = {
253
281
  UsageResponseKeys.ANSWER: str_response,
254
- UsageResponseKeys.USAGE: stats,
282
+ UsageResponseKeys.USAGE: usage,
255
283
  }
256
284
  return response
257
285
 
@@ -264,27 +292,42 @@ class OpenAIProvider(ModelProvider):
264
292
  """
265
293
  OpenAI-specific implementation of `ModelProvider.invoke`.
266
294
  Invokes an OpenAI model operation using the synchronous client.
267
- For full details, see `ModelProvider.invoke`.
268
295
 
269
296
  :param messages:
270
- Same as `ModelProvider.invoke`.
297
+ A list of dictionaries representing the conversation history or input messages.
298
+ Each dictionary should follow the format::
299
+ {
300
+ "role": "system" | "user" | "assistant",
301
+ "content": "Message content as a string",
302
+ }
303
+
304
+ Example:
305
+
306
+ .. code-block:: json
271
307
 
272
- :param invoke_response_format: InvokeResponseFormat
308
+ [
309
+ {"role": "system", "content": "You are a helpful assistant."},
310
+ {"role": "user", "content": "What is the capital of France?"}
311
+ ]
312
+
313
+ Defaults to None if no messages are provided.
314
+
315
+ :param invoke_response_format:
273
316
  Specifies the format of the returned response. Options:
274
317
 
275
318
  - "string": Returns only the generated text content, taken from a single response.
276
- - "stats": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
319
+ - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary::
277
320
 
278
- .. code-block:: json
279
- {
280
- "answer": "<generated_text>",
281
- "stats": <ChatCompletion>.to_dict()["usage"]
282
- }
321
+ .. code-block:: json
322
+ {
323
+ "answer": "<generated_text>",
324
+ "usage": <ChatCompletion>.to_dict()["usage"]
325
+ }
283
326
 
284
327
  - "full": Returns the full OpenAI `ChatCompletion` object.
285
328
 
286
329
  :param invoke_kwargs:
287
- Additional keyword arguments passed to the OpenAI client. Same as in `ModelProvider.invoke`.
330
+ Additional keyword arguments passed to the OpenAI client.
288
331
 
289
332
  :return:
290
333
  A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
@@ -305,18 +348,46 @@ class OpenAIProvider(ModelProvider):
305
348
  ) -> Union[str, "ChatCompletion", dict]:
306
349
  """
307
350
  OpenAI-specific implementation of `ModelProvider.async_invoke`.
308
- Invokes an OpenAI model operation using the async client.
309
- For full details, see `ModelProvider.async_invoke` and `OpenAIProvider.invoke`.
351
+ Invokes an OpenAI model operation using the asynchronous client.
352
+
353
+ :param messages:
354
+ A list of dictionaries representing the conversation history or input messages.
355
+ Each dictionary should follow the format::
356
+ {
357
+ "role": "system" | "user" | "assistant",
358
+ "content": "Message content as a string",
359
+ }
360
+
361
+ Example:
362
+
363
+ .. code-block:: json
364
+
365
+ [
366
+ {"role": "system", "content": "You are a helpful assistant."},
367
+ {"role": "user", "content": "What is the capital of France?"}
368
+ ]
310
369
 
311
- :param messages: Same as `OpenAIProvider.invoke`.
370
+ Defaults to None if no messages are provided.
312
371
 
313
- :param invoke_response_format: InvokeResponseFormat
314
- Same as `OpenAIProvider.invoke`.
372
+ :param invoke_response_format:
373
+ Specifies the format of the returned response. Options:
374
+
375
+ - "string": Returns only the generated text content, taken from a single response.
376
+ - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary::
377
+
378
+ .. code-block:: json
379
+ {
380
+ "answer": "<generated_text>",
381
+ "usage": <ChatCompletion>.to_dict()["usage"]
382
+ }
383
+
384
+ - "full": Returns the full OpenAI `ChatCompletion` object.
315
385
 
316
386
  :param invoke_kwargs:
317
- Same as `OpenAIProvider.invoke`.
318
- :returns Same as `ModelProvider.async_invoke`.
387
+ Additional keyword arguments passed to the OpenAI client.
319
388
 
389
+ :return:
390
+ A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
320
391
  """
321
392
  response = await self.async_custom_invoke(messages=messages, **invoke_kwargs)
322
393
  return self._response_handler(
mlrun/datastore/s3.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
+ import warnings
16
17
  from typing import Optional
17
18
  from urllib.parse import urlparse
18
19
 
@@ -28,6 +29,27 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
28
29
  class S3Store(DataStore):
29
30
  using_bucket = True
30
31
 
32
+ # TODO: Remove this in 1.12.0
33
+ def _get_endpoint_url_with_deprecation_warning(self):
34
+ """Get S3 endpoint URL with backward compatibility for deprecated S3_ENDPOINT_URL"""
35
+ # First try the new environment variable
36
+ endpoint_url = self._get_secret_or_env("AWS_ENDPOINT_URL_S3")
37
+ if endpoint_url:
38
+ return endpoint_url
39
+
40
+ # Check for deprecated environment variable
41
+ deprecated_endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
42
+ if deprecated_endpoint_url:
43
+ warnings.warn(
44
+ "S3_ENDPOINT_URL is deprecated in 1.10.0 and will be removed in 1.12.0, "
45
+ "use AWS_ENDPOINT_URL_S3 instead.",
46
+ # TODO: Remove this in 1.12.0
47
+ FutureWarning,
48
+ )
49
+ return deprecated_endpoint_url
50
+
51
+ return None
52
+
31
53
  def __init__(
32
54
  self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
33
55
  ):
@@ -41,7 +63,7 @@ class S3Store(DataStore):
41
63
  access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
42
64
  secret_key = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
43
65
  token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
44
- endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
66
+ endpoint_url = self._get_endpoint_url_with_deprecation_warning()
45
67
  force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
46
68
  profile_name = self._get_secret_or_env("AWS_PROFILE")
47
69
  assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
@@ -159,7 +181,7 @@ class S3Store(DataStore):
159
181
  def get_storage_options(self):
160
182
  force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
161
183
  profile = self._get_secret_or_env("AWS_PROFILE")
162
- endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
184
+ endpoint_url = self._get_endpoint_url_with_deprecation_warning()
163
185
  access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
164
186
  secret = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
165
187
  token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
@@ -18,10 +18,9 @@ from mergedeep import merge
18
18
  from storey import V3ioDriver
19
19
 
20
20
  import mlrun
21
- import mlrun.model_monitoring.helpers
22
21
  from mlrun.datastore.base import DataStore
23
22
  from mlrun.datastore.datastore_profile import (
24
- DatastoreProfileKafkaSource,
23
+ DatastoreProfileKafkaStream,
25
24
  DatastoreProfileKafkaTarget,
26
25
  DatastoreProfileTDEngine,
27
26
  datastore_profile_read,
@@ -138,7 +137,7 @@ class KafkaStoreyTarget(storey.KafkaTarget):
138
137
  datastore_profile = datastore_profile_read(path)
139
138
  if not isinstance(
140
139
  datastore_profile,
141
- (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
140
+ (DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
142
141
  ):
143
142
  raise mlrun.errors.MLRunInvalidArgumentError(
144
143
  f"Unsupported datastore profile type: {type(datastore_profile)}"