PyPI - mlrun - Versions diffs - 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl - Mend

mlrun 1.10.0rc24py3-none-any.whl → 1.10.0rc26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show

mlrun/artifacts/llm_prompt.py +8 -1
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/hub.py +25 -18
mlrun/common/schemas/model_monitoring/constants.py +1 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
mlrun/config.py +2 -3
mlrun/datastore/__init__.py +2 -2
mlrun/datastore/azure_blob.py +66 -43
mlrun/datastore/datastore_profile.py +35 -5
mlrun/datastore/model_provider/huggingface_provider.py +122 -30
mlrun/datastore/model_provider/model_provider.py +62 -4
mlrun/datastore/model_provider/openai_provider.py +114 -43
mlrun/datastore/s3.py +24 -2
mlrun/datastore/storeytargets.py +2 -3
mlrun/db/base.py +15 -1
mlrun/db/httpdb.py +17 -6
mlrun/db/nopdb.py +14 -0
mlrun/k8s_utils.py +0 -14
mlrun/model_monitoring/api.py +2 -2
mlrun/model_monitoring/applications/base.py +37 -10
mlrun/model_monitoring/applications/context.py +1 -4
mlrun/model_monitoring/controller.py +15 -5
mlrun/model_monitoring/db/_schedules.py +2 -4
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
mlrun/model_monitoring/helpers.py +5 -5
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/project.py +33 -29
mlrun/runtimes/base.py +0 -3
mlrun/runtimes/mounts.py +15 -2
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +11 -2
mlrun/runtimes/nuclio/function.py +10 -0
mlrun/runtimes/nuclio/serving.py +4 -0
mlrun/runtimes/pod.py +153 -11
mlrun/runtimes/utils.py +22 -5
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +26 -14
mlrun/serving/states.py +3 -3
mlrun/serving/system_steps.py +52 -29
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +5 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0

mlrun/datastore/model_provider/huggingface_provider.py CHANGED Viewed

@@ -36,6 +36,9 @@ class HuggingFaceProvider(ModelProvider):
     This class extends the ModelProvider base class and implements Hugging Face-specific
     functionality, including pipeline initialization, default text generation operations,
     and custom operations tailored to the Hugging Face Transformers pipeline API.
+    Note: The pipeline object will download the model (if not already cached) and load it
+    into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
     """
     def __init__(
@@ -62,13 +65,12 @@ class HuggingFaceProvider(ModelProvider):
         )
         self.options = self.get_client_options()
         self._expected_operation_type = None
-        self.load_client()
+        self._download_model()
     @staticmethod
     def _extract_string_output(response: list[dict]) -> str:
         """
-        Extracts the first generated string from Hugging Face pipeline output,
-        regardless of whether it's plain text-generation or chat-style output.
+        Extracts the first generated string from Hugging Face pipeline output
         """
         if not isinstance(response, list) or len(response) == 0:
             raise ValueError("Empty or invalid pipeline output")
@@ -86,6 +88,39 @@ class HuggingFaceProvider(ModelProvider):
             subpath = ""
         return endpoint, subpath
+    @property
+    def client(self) -> Any:
+        """
+        Lazily return the HuggingFace-pipeline client.
+        If the client has not been initialized yet, it will be created
+        by calling `load_client`.
+        """
+        self.load_client()
+        return self._client
+    def _download_model(self):
+        """
+        Pre-downloads model files locally to prevent race conditions in multiprocessing.
+        Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
+        file copying for safe concurrent access across multiple processes.
+        :raises:
+            ImportError: If huggingface_hub package is not installed.
+        """
+        try:
+            from huggingface_hub import snapshot_download
+            # Download the model and tokenizer files directly to the cache.
+            snapshot_download(
+                repo_id=self.model,
+                local_dir_use_symlinks=False,
+                token=self._get_secret_or_env("HF_TOKEN") or None,
+            )
+        except ImportError as exc:
+            raise ImportError("huggingface_hub package is not installed") from exc
     def _response_handler(
         self,
         response: Union[str, list],
@@ -94,27 +129,46 @@ class HuggingFaceProvider(ModelProvider):
         **kwargs,
     ) -> Union[str, list, dict[str, Any]]:
         """
-        Same as `ModelProvider._response_handler`.
+        Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
+        The response should exclude the user’s input (no repetition in the output).
+        This can be accomplished by invoking the pipeline with `return_full_text=False`.
+        :param response:                The raw response from the HuggingFace pipeline, typically a list of dictionaries
+                                        containing generated text sequences.
+        :param invoke_response_format:  Determines how the response should be processed and returned. Options:
+                                       - STRING: Return only the main generated content as a string,
+                                                 for single-answer responses.
+                                       - USAGE: Return a dictionary combining the string response with
+                                                token usage statistics:
-        * Expected to receive the response with `return_full_text=False`.
+                                       .. code-block:: json
-        :param messages:                Same as in `ModelProvider._response_handler`.
-        :param response:                Same as in `ModelProvider._response_handler`.
-        :param invoke_response_format:  Same as in `ModelProvider._response_handler`, in full and string modes.
+                                       {
+                                           "answer": "<generated_text>",
+                                           "usage": {
+                                               "prompt_tokens": <int>,
+                                               "completion_tokens": <int>,
+                                               "total_tokens": <int>
+                                           }
+                                       }
-                                        For usage mode, generate 3 statistics:
-                                        prompt_tokens, completion_tokens and total_tokens.
+                                       Note: Token counts are estimated after answer generation and
+                                       may differ from the actual tokens generated by the model due to
+                                       internal decoding behavior and implementation details.
-                                        NOTE: Token counts are estimated after answer generation and
-                                        may differ from the actual tokens generated by the model due to
-                                        internal decoding behavior and implementation details.
+                                       - FULL: Return the full raw response object.
-        :param kwargs:                  Same as in `ModelProvider._response_handler`.
+        :param messages:               The original input messages used for token count estimation in USAGE mode.
+                                       Can be a string, list of strings, or chat format messages.
+        :param kwargs:                 Additional parameters for response processing.
-        :return: The result formatted according to the `invoke_response_format`.
+        :return:                       The processed response in the format specified by `invoke_response_format`.
+                                       Can be a string, dictionary, or the original response object.
         :raises MLRunInvalidArgumentError: If extracting the string response fails.
-        :raises MLRunRuntimeError: If applying the chat template to the model fails.
+        :raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
         """
         if InvokeResponseFormat.is_str_response(invoke_response_format.value):
             str_response = self._extract_string_output(response)
@@ -161,11 +215,15 @@ class HuggingFaceProvider(ModelProvider):
         :raises:
             ImportError: If the `transformers` package is not installed.
         """
+        if self._client:
+            return
         try:
             from transformers import pipeline, AutoModelForCausalLM  # noqa
             from transformers import AutoTokenizer  # noqa
             from transformers.pipelines.base import Pipeline  # noqa
+            self.options["model_kwargs"] = self.options.get("model_kwargs", {})
+            self.options["model_kwargs"]["local_files_only"] = True
             self._client = pipeline(model=self.model, **self.options)
             self._expected_operation_type = Pipeline
         except ImportError as exc:
@@ -186,23 +244,38 @@ class HuggingFaceProvider(ModelProvider):
         self, operation: Optional["Pipeline"] = None, **invoke_kwargs
     ) -> Union[list, dict, Any]:
         """
-        HuggingFace implementation of `ModelProvider.custom_invoke`.
-        Use the default config in provider client/ user defined client:
+        Invokes a HuggingFace pipeline operation with the given keyword arguments.
+        This method provides flexibility to use a custom pipeline object for specific tasks
+        (e.g., image classification, sentiment analysis).
+        The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
         Example:
-        ```python
+            ```python
+            from transformers import pipeline
+            from PIL import Image
+            # Using custom pipeline for image classification
             image = Image.open(image_path)
-            pipeline_object =  pipeline("image-classification", model="microsoft/resnet-50")
+            pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
             result = hf_provider.custom_invoke(
                 pipeline_object,
                 inputs=image,
             )
-        ```
+            ```
+        :param operation:      A Pipeline object from the transformers library.
+                               If not provided, defaults to the provider's configured pipeline.
+        :param invoke_kwargs:  Keyword arguments to pass to the pipeline operation.
+                               These are merged with `default_invoke_kwargs` and may include
+                               parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
+        :return:               The full response returned by the pipeline operation.
+                               Format depends on the pipeline task (list for text generation,
+                               dict for classification, etc.).
-        :param operation:               A pipeline object
-        :param invoke_kwargs:           Keyword arguments to pass to the operation.
-        :return:                        The full response returned by the operation.
+        :raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
         """
         invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
@@ -222,12 +295,24 @@ class HuggingFaceProvider(ModelProvider):
         **invoke_kwargs,
     ) -> Union[str, list, dict[str, Any]]:
         """
-        HuggingFace-specific implementation of `ModelProvider.invoke`.
-        Invokes a HuggingFace model operation using the synchronous client.
-        For full details, see `ModelProvider.invoke`.
+        HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
+        Invokes a HuggingFace model operation for text generation tasks.
+        Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
         :param messages:
-            Same as `ModelProvider.invoke`.
+            Input for the text generation model. Can be provided in multiple formats:
+            - A single string: Direct text input for generation
+            - A list of strings: Multiple text inputs for batch processing
+            - Chat format: A list of dictionaries with "role" and "content" keys:
+            .. code-block:: json
+                [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What is the capital of France?"}
+                ]
         :param invoke_response_format: InvokeResponseFormat
             Specifies the format of the returned response. Options:
@@ -245,17 +330,24 @@ class HuggingFaceProvider(ModelProvider):
                     }
                 }
+            Note: For usage mode, the model tokenizer should support apply_chat_template.
             - "full":   Returns the raw response object from the HuggingFace model,
                         typically a list of generated sequences (dictionaries).
                         This format does not include token usage statistics.
         :param invoke_kwargs:
-            Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
+            Additional keyword arguments passed to the HuggingFace pipeline.
         :return:
             A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
-        """
+        :raises MLRunInvalidArgumentError:
+            If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
+            string content.
+        :raises MLRunRuntimeError:
+            If using "usage" response mode and the model tokenizer does not support chat template formatting.
+        """
         if self.client.task != "text-generation":
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "HuggingFaceProvider.invoke supports text-generation task only"

mlrun/datastore/model_provider/model_provider.py CHANGED Viewed

@@ -108,7 +108,7 @@ class ModelProvider(BaseRemoteClient):
                                                 additional metadata or token usage statistics, in this format:
                                                 {"answer": <string>, "usage": <dict>}
-                                       - FULL: Return the full raw response object unmodified.
+                                       - FULL: Return the full raw response object.
         :param kwargs:                  Additional parameters that may be required by specific implementations.
@@ -164,7 +164,9 @@ class ModelProvider(BaseRemoteClient):
             )
         return self._async_client
-    def custom_invoke(self, operation: Optional[Callable], **invoke_kwargs) -> Any:
+    def custom_invoke(
+        self, operation: Optional[Callable] = None, **invoke_kwargs
+    ) -> Any:
         """
         Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
@@ -178,7 +180,7 @@ class ModelProvider(BaseRemoteClient):
         raise NotImplementedError("custom_invoke method is not implemented")
     async def async_custom_invoke(
-        self, operation: Optional[Callable[..., Awaitable[Any]]], **invoke_kwargs
+        self, operation: Optional[Callable[..., Awaitable[Any]]] = None, **invoke_kwargs
     ) -> Any:
         """
         Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
@@ -263,5 +265,61 @@ class ModelProvider(BaseRemoteClient):
         invoke_response_format=InvokeResponseFormat.FULL,
         **invoke_kwargs,
     ) -> Union[str, dict[str, Any], Any]:
-        """Async version of `invoke`. See `invoke` for full documentation."""
+        """
+        Asynchronously invokes a generative AI model with the provided messages and additional parameters.
+        This method is designed to be a flexible interface for interacting with various
+        generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
+        a list of messages (following a standardized format) and receive a response.
+        :param messages:            A list of dictionaries representing the conversation history or input messages.
+                                    Each dictionary should follow the format::
+                                    {"role": "system"| "user" | "assistant" ..., "content":
+                                    "Message content as a string"}
+                                    Example:
+                                    .. code-block:: json
+                                        [
+                                            {"role": "system", "content": "You are a helpful assistant."},
+                                            {"role": "user", "content": "What is the capital of France?"}
+                                        ]
+                                    This format is consistent across all backends. Defaults to None if no messages
+                                    are provided.
+        :param invoke_response_format:   Determines how the model response is returned:
+                                    - string:   Returns only the generated text content from the model output,
+                                                for single-answer responses only.
+                                    - usage:    Combines the STRING response with additional metadata (token usage),
+                                                and returns the result in a dictionary.
+                                                Note: The usage dictionary may contain additional
+                                                keys depending on the model provider:
+                                    .. code-block:: json
+                                    {
+                                        "answer": "<generated_text>",
+                                        "usage": {
+                                        "prompt_tokens": <int>,
+                                        "completion_tokens": <int>,
+                                        "total_tokens": <int>
+                                        }
+                                    }
+                                    - full:   Returns the full model output.
+        :param invoke_kwargs:
+                                    Additional keyword arguments to be passed to the underlying model API call.
+                                    These can include parameters such as temperature, max tokens, etc.,
+                                    depending on the capabilities of the specific backend being used.
+        :return:                    The invoke result formatted according to the specified
+                                    invoke_response_format parameter.
+        """
         raise NotImplementedError("async_invoke is not implemented")

mlrun/datastore/model_provider/openai_provider.py CHANGED Viewed

@@ -80,8 +80,12 @@ class OpenAIProvider(ModelProvider):
     @staticmethod
     def _extract_string_output(response: "ChatCompletion") -> str:
         """
-        Extracts the first generated string from Hugging Face pipeline output,
-        regardless of whether it's plain text-generation or chat-style output.
+        Extracts the text content of the first choice from an OpenAI ChatCompletion response.
+        Only supports responses with a single choice. Raises an error if multiple choices exist.
+        :param response: The ChatCompletion response from OpenAI.
+        :return: The text content of the first message in the response.
+        :raises MLRunInvalidArgumentError: If the response contains more than one choice.
         """
         if len(response.choices) != 1:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -115,13 +119,14 @@ class OpenAIProvider(ModelProvider):
         The client is created only if it does not already exist.
         Raises ImportError if the openai package is not installed.
         """
-        if not self._client:
-            try:
-                from openai import OpenAI  # noqa
+        if self._client:
+            return
+        try:
+            from openai import OpenAI  # noqa
-                self._client = OpenAI(**self.options)
-            except ImportError as exc:
-                raise ImportError("openai package is not installed") from exc
+            self._client = OpenAI(**self.options)
+        except ImportError as exc:
+            raise ImportError("openai package is not installed") from exc
     def load_async_client(self) -> None:
         """
@@ -163,25 +168,37 @@ class OpenAIProvider(ModelProvider):
         self, operation: Optional[Callable] = None, **invoke_kwargs
     ) -> Union["ChatCompletion", "BaseModel"]:
         """
-        OpenAI-specific implementation of `ModelProvider.custom_invoke`.
+        Invokes a model operation from the OpenAI client with the given keyword arguments.
-        Invokes an OpenAI model operation using the sync client. For full details, see
-        `ModelProvider.custom_invoke`.
+        This method provides flexibility to either:
+        - Call a specific OpenAI client operation (e.g., `client.images.generate`).
+        - Default to `chat.completions.create` when no operation is provided.
+        The operation must be a callable that accepts keyword arguments. If the callable
+        does not accept a `model` parameter, it will be omitted from the call.
         Example:
             ```python
-            result = openai_model_provider.invoke(
+            result = openai_model_provider.custom_invoke(
                 openai_model_provider.client.images.generate,
                 prompt="A futuristic cityscape at sunset",
                 n=1,
                 size="1024x1024",
             )
             ```
-        :param      operation:      Same as ModelProvider.custom_invoke.
-        :param      invoke_kwargs:  Same as ModelProvider.custom_invoke.
-        :return:                    Same as ModelProvider.custom_invoke.
+        :param operation:       A callable representing the OpenAI operation to invoke.
+                                If not provided, defaults to `client.chat.completions.create`.
+        :param invoke_kwargs:   Additional keyword arguments to pass to the operation.
+                                These are merged with `default_invoke_kwargs` and may
+                                include parameters such as `temperature`, `max_tokens`,
+                                or `messages`.
+        :return:                The full response returned by the operation, typically
+                                an OpenAI `ChatCompletion` or other OpenAI SDK model.
         """
         invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
         model_kwargs = {"model": invoke_kwargs.pop("model", None) or self.model}
@@ -202,24 +219,35 @@ class OpenAIProvider(ModelProvider):
         **invoke_kwargs,
     ) -> Union["ChatCompletion", "BaseModel"]:
         """
-        OpenAI-specific implementation of `ModelProvider.async_custom_invoke`.
+        Asynchronously invokes a model operation from the OpenAI client with the given keyword arguments.
-        Invokes an OpenAI model operation using the async client. For full details, see
-        `ModelProvider.async_custom_invoke`.
+        This method provides flexibility to either:
+        - Call a specific async OpenAI client operation (e.g., `async_client.images.generate`).
+        - Default to `chat.completions.create` when no operation is provided.
+        The operation must be an async callable that accepts keyword arguments.
+        If the callable does not accept a `model` parameter, it will be omitted from the call.
         Example:
-        ```python
-            result = openai_model_provider.invoke(
+            ```python
+            result = await openai_model_provider.async_custom_invoke(
                 openai_model_provider.async_client.images.generate,
                 prompt="A futuristic cityscape at sunset",
                 n=1,
                 size="1024x1024",
             )
-        ```
+            ```
+        :param operation:       An async callable representing the OpenAI operation to invoke.
+                                If not provided, defaults to `async_client.chat.completions.create`.
-        :param operation:       Same as ModelProvider.async_custom_invoke.
-        :param invoke_kwargs:   Same as ModelProvider.async_custom_invoke.
-        :return:                Same as ModelProvider.async_custom_invoke.
+        :param invoke_kwargs:   Additional keyword arguments to pass to the operation.
+                                These are merged with `default_invoke_kwargs` and may
+                                include parameters such as `temperature`, `max_tokens`,
+                                or `messages`.
+        :return:                The full response returned by the awaited operation,
+                                typically an OpenAI `ChatCompletion` or other OpenAI SDK model.
         """
         invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
@@ -248,10 +276,10 @@ class OpenAIProvider(ModelProvider):
             if invoke_response_format == InvokeResponseFormat.STRING:
                 return str_response
             if invoke_response_format == InvokeResponseFormat.USAGE:
-                stats = response.to_dict()["usage"]
+                usage = response.to_dict()["usage"]
                 response = {
                     UsageResponseKeys.ANSWER: str_response,
-                    UsageResponseKeys.USAGE: stats,
+                    UsageResponseKeys.USAGE: usage,
                 }
         return response
@@ -264,27 +292,42 @@ class OpenAIProvider(ModelProvider):
         """
         OpenAI-specific implementation of `ModelProvider.invoke`.
         Invokes an OpenAI model operation using the synchronous client.
-        For full details, see `ModelProvider.invoke`.
         :param messages:
-            Same as `ModelProvider.invoke`.
+            A list of dictionaries representing the conversation history or input messages.
+            Each dictionary should follow the format::
+                {
+                    "role": "system" | "user" | "assistant",
+                    "content": "Message content as a string",
+                }
+            Example:
+            .. code-block:: json
-        :param invoke_response_format: InvokeResponseFormat
+                [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What is the capital of France?"}
+                ]
+            Defaults to None if no messages are provided.
+        :param invoke_response_format:
             Specifies the format of the returned response. Options:
             - "string": Returns only the generated text content, taken from a single response.
-            - "stats": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
+            - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary::
-              .. code-block:: json
-                 {
-                     "answer": "<generated_text>",
-                     "stats": <ChatCompletion>.to_dict()["usage"]
-                 }
+                .. code-block:: json
+                   {
+                       "answer": "<generated_text>",
+                       "usage": <ChatCompletion>.to_dict()["usage"]
+                   }
             - "full": Returns the full OpenAI `ChatCompletion` object.
         :param invoke_kwargs:
-            Additional keyword arguments passed to the OpenAI client. Same as in `ModelProvider.invoke`.
+            Additional keyword arguments passed to the OpenAI client.
         :return:
             A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
@@ -305,18 +348,46 @@ class OpenAIProvider(ModelProvider):
     ) -> Union[str, "ChatCompletion", dict]:
         """
         OpenAI-specific implementation of `ModelProvider.async_invoke`.
-        Invokes an OpenAI model operation using the async client.
-        For full details, see `ModelProvider.async_invoke` and `OpenAIProvider.invoke`.
+        Invokes an OpenAI model operation using the asynchronous client.
+        :param messages:
+            A list of dictionaries representing the conversation history or input messages.
+            Each dictionary should follow the format::
+                {
+                    "role": "system" | "user" | "assistant",
+                    "content": "Message content as a string",
+                }
+            Example:
+            .. code-block:: json
+                [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What is the capital of France?"}
+                ]
-        :param messages:    Same as `OpenAIProvider.invoke`.
+            Defaults to None if no messages are provided.
-        :param invoke_response_format: InvokeResponseFormat
-                            Same as `OpenAIProvider.invoke`.
+        :param invoke_response_format:
+            Specifies the format of the returned response. Options:
+            - "string": Returns only the generated text content, taken from a single response.
+            - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary::
+                .. code-block:: json
+                   {
+                       "answer": "<generated_text>",
+                       "usage": <ChatCompletion>.to_dict()["usage"]
+                   }
+            - "full": Returns the full OpenAI `ChatCompletion` object.
         :param invoke_kwargs:
-                            Same as `OpenAIProvider.invoke`.
-        :returns            Same as `ModelProvider.async_invoke`.
+            Additional keyword arguments passed to the OpenAI client.
+        :return:
+            A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
         """
         response = await self.async_custom_invoke(messages=messages, **invoke_kwargs)
         return self._response_handler(

mlrun/datastore/s3.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import time
+import warnings
 from typing import Optional
 from urllib.parse import urlparse
@@ -28,6 +29,27 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class S3Store(DataStore):
     using_bucket = True
+    # TODO: Remove this in 1.12.0
+    def _get_endpoint_url_with_deprecation_warning(self):
+        """Get S3 endpoint URL with backward compatibility for deprecated S3_ENDPOINT_URL"""
+        # First try the new environment variable
+        endpoint_url = self._get_secret_or_env("AWS_ENDPOINT_URL_S3")
+        if endpoint_url:
+            return endpoint_url
+        # Check for deprecated environment variable
+        deprecated_endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
+        if deprecated_endpoint_url:
+            warnings.warn(
+                "S3_ENDPOINT_URL is deprecated in 1.10.0 and will be removed in 1.12.0, "
+                "use AWS_ENDPOINT_URL_S3 instead.",
+                # TODO: Remove this in 1.12.0
+                FutureWarning,
+            )
+            return deprecated_endpoint_url
+        return None
     def __init__(
         self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
     ):
@@ -41,7 +63,7 @@ class S3Store(DataStore):
         access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
         secret_key = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
         token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
-        endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
+        endpoint_url = self._get_endpoint_url_with_deprecation_warning()
         force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
         profile_name = self._get_secret_or_env("AWS_PROFILE")
         assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
@@ -159,7 +181,7 @@ class S3Store(DataStore):
     def get_storage_options(self):
         force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
         profile = self._get_secret_or_env("AWS_PROFILE")
-        endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
+        endpoint_url = self._get_endpoint_url_with_deprecation_warning()
         access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
         secret = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
         token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")

mlrun/datastore/storeytargets.py CHANGED Viewed

@@ -18,10 +18,9 @@ from mergedeep import merge
 from storey import V3ioDriver
 import mlrun
-import mlrun.model_monitoring.helpers
 from mlrun.datastore.base import DataStore
 from mlrun.datastore.datastore_profile import (
-    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaStream,
     DatastoreProfileKafkaTarget,
     DatastoreProfileTDEngine,
     datastore_profile_read,
@@ -138,7 +137,7 @@ class KafkaStoreyTarget(storey.KafkaTarget):
             datastore_profile = datastore_profile_read(path)
             if not isinstance(
                 datastore_profile,
-                (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
+                (DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
             ):
                 raise mlrun.errors.MLRunInvalidArgumentError(
                     f"Unsupported datastore profile type: {type(datastore_profile)}"

mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc24py3-none-any.whl → 1.10.0rc26py3-none-any.whl