PyPI - mlrun - Versions diffs - 1.10.0rc24__py3-none-any.whl → 1.10.0rc25__py3-none-any.whl - Mend

mlrun 1.10.0rc24py3-none-any.whl → 1.10.0rc25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (31) hide show

mlrun/common/schemas/hub.py +14 -0
mlrun/common/schemas/model_monitoring/constants.py +1 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
mlrun/datastore/azure_blob.py +66 -43
mlrun/datastore/datastore_profile.py +8 -2
mlrun/datastore/model_provider/huggingface_provider.py +118 -30
mlrun/datastore/model_provider/model_provider.py +61 -3
mlrun/datastore/model_provider/openai_provider.py +114 -43
mlrun/db/base.py +1 -1
mlrun/db/httpdb.py +6 -4
mlrun/db/nopdb.py +1 -0
mlrun/model_monitoring/api.py +2 -2
mlrun/model_monitoring/applications/base.py +22 -10
mlrun/model_monitoring/applications/context.py +1 -4
mlrun/model_monitoring/controller.py +10 -2
mlrun/model_monitoring/db/_schedules.py +2 -4
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/project.py +28 -24
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +11 -2
mlrun/runtimes/nuclio/function.py +10 -0
mlrun/runtimes/nuclio/serving.py +4 -0
mlrun/runtimes/utils.py +22 -5
mlrun/serving/server.py +25 -14
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/METADATA +23 -22
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/RECORD +31 -31
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/hub.py CHANGED Viewed

@@ -15,6 +15,7 @@
 from datetime import datetime, timezone
 from typing import Optional
+import deepdiff
 from pydantic.v1 import BaseModel, Extra, Field
 import mlrun.common.types
@@ -83,6 +84,19 @@ class HubSource(BaseModel):
             status=ObjectStatus(state="created"),
         )
+    def diff(self, another_source: "HubSource") -> dict:
+        """
+        Compare this HubSource with another one.
+        Returns a dict of differences (metadata, spec, status).
+        """
+        exclude_paths = [
+            "root['metadata']['updated']",
+            "root['metadata']['created']",
+        ]
+        return deepdiff.DeepDiff(
+            self.dict(), another_source.dict(), exclude_paths=exclude_paths
+        )
 last_source_index = -1

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -331,6 +331,7 @@ class EndpointType(IntEnum):
 class EndpointMode(IntEnum):
     REAL_TIME = 0
     BATCH = 1
+    BATCH_LEGACY = 2  # legacy batch mode, used for endpoints created through the batch inference job
 class MonitoringFunctionNames(MonitoringStrEnum):

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -119,7 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
     project: constr(regex=PROJECT_PATTERN)
     endpoint_type: EndpointType = EndpointType.NODE_EP
     uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
-    mode: EndpointMode = EndpointMode.REAL_TIME
+    mode: Optional[EndpointMode] = None
     @classmethod
     def mutable_fields(cls):
@@ -131,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
             return str(v)
         return v
+    @validator("mode", pre=True, always=True)
+    def _set_mode_based_on_endpoint_type(cls, v, values):  # noqa: N805
+        if v is None:
+            if values.get("endpoint_type") == EndpointType.BATCH_EP:
+                return EndpointMode.BATCH_LEGACY
+            else:
+                return EndpointMode.REAL_TIME
+        return v
 class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
     model_class: Optional[str] = ""

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -229,18 +229,25 @@ class AzureBlobStore(DataStore):
         st = self.storage_options
         service = "blob"
         primary_url = None
-        if st.get("connection_string"):
+        # Parse connection string (fills account_name/account_key or SAS)
+        connection_string = st.get("connection_string")
+        if connection_string:
             primary_url, _, parsed_credential = parse_connection_str(
-                st.get("connection_string"), credential=None, service=service
+                connection_string, credential=None, service=service
             )
-            for key in ["account_name", "account_key"]:
-                parsed_value = parsed_credential.get(key)
-                if parsed_value:
+            if isinstance(parsed_credential, str):
+                # SharedAccessSignature as raw string
+                parsed_credential = {"sas_token": parsed_credential}
+            for key in ["account_name", "account_key", "sas_token"]:
+                if parsed_value := parsed_credential.get(key):
                     if key in st and st[key] != parsed_value:
                         if key == "account_name":
                             raise mlrun.errors.MLRunInvalidArgumentError(
-                                f"Storage option for '{key}' is '{st[key]}',\
-                                    which does not match corresponding connection string '{parsed_value}'"
+                                f"Storage option for '{key}' is '{st[key]}', "
+                                f"which does not match corresponding connection string '{parsed_value}'"
                             )
                         else:
                             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,6 +256,7 @@ class AzureBlobStore(DataStore):
                     st[key] = parsed_value
         account_name = st.get("account_name")
+        # Derive host (prefer connection string primary URL)
         if primary_url:
             if primary_url.startswith("http://"):
                 primary_url = primary_url[len("http://") :]
@@ -258,48 +266,63 @@ class AzureBlobStore(DataStore):
         elif account_name:
             host = f"{account_name}.{service}.core.windows.net"
         else:
+            # nothing to configure yet
             return res
-        if "account_key" in st:
+        host = host.rstrip("/")
+        # Account key (optional; WASB supports it)
+        if "account_key" in st and st["account_key"]:
             res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
-        if "client_secret" in st or "client_id" in st or "tenant_id" in st:
-            res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
-            res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
-                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
-            )
-            if "client_id" in st:
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
-                    "client_id"
-                ]
-            if "client_secret" in st:
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
-                    "client_secret"
-                ]
-            if "tenant_id" in st:
-                tenant_id = st["tenant_id"]
-                res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
-                    f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
-                )
+        # --- WASB + SAS (container-scoped key; no provider classes needed) ---
+        if "sas_token" in st and st["sas_token"]:
+            sas = st["sas_token"].lstrip("?")
+            if container := getattr(self, "endpoint", None) or st.get("container"):
+                # fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
+                res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
-        if "sas_token" in st:
-            res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
-            res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
-                "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
-            )
-            res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
+            else:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "Container name is required for WASB SAS. "
+                    "Set self.endpoint or storage_options['container']."
+                )
         return res
     @property
     def spark_url(self):
-        spark_options = self.get_spark_options()
-        url = f"wasbs://{self.endpoint}"
-        prefix = "spark.hadoop.fs.azure.account.key."
-        if spark_options:
-            for key in spark_options:
-                if key.startswith(prefix):
-                    account_key = key[len(prefix) :]
-                    if not url.endswith(account_key):
-                        url += f"@{account_key}"
-                    break
-        return url
+        # Build: wasbs://<container>@<host>
+        st = self.storage_options
+        service = "blob"
+        container = getattr(self, "endpoint", None) or st.get("container")
+        if not container:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Container is required to build the WASB URL "
+                "(self.endpoint or storage_options['container'])."
+            )
+        # Prefer host from connection string; else synthesize from account_name
+        host = None
+        account_name = st.get("account_name")
+        connection_string = st.get("connection_string")
+        if connection_string:
+            primary_url, _, _ = parse_connection_str(
+                connection_string, credential=None, service=service
+            )
+            if primary_url.startswith("http://"):
+                primary_url = primary_url[len("http://") :]
+            if primary_url.startswith("https://"):
+                primary_url = primary_url[len("https://") :]
+            host = primary_url.rstrip("/")
+        if not host and account_name:
+            host = f"{account_name}.{service}.core.windows.net"
+        if not host:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "account_name is required (or provide a connection_string) to build the WASB URL."
+            )
+        return f"wasbs://{container}@{host}"

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -333,7 +333,9 @@ class DatastoreProfileGCS(DatastoreProfile):
             #  in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
             subpath = subpath[1:]
         if self.bucket:
-            return f"gcs://{self.bucket}/{subpath}"
+            return (
+                f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
+            )
         else:
             return f"gcs://{subpath}"
@@ -370,7 +372,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
             #  in azure the path after schema is starts with container, wherefore it should not start with "/".
             subpath = subpath[1:]
         if self.container:
-            return f"az://{self.container}/{subpath}"
+            return (
+                f"az://{self.container}/{subpath}"
+                if subpath
+                else f"az://{self.container}"
+            )
         else:
             return f"az://{subpath}"

mlrun/datastore/model_provider/huggingface_provider.py CHANGED Viewed

@@ -36,6 +36,9 @@ class HuggingFaceProvider(ModelProvider):
     This class extends the ModelProvider base class and implements Hugging Face-specific
     functionality, including pipeline initialization, default text generation operations,
     and custom operations tailored to the Hugging Face Transformers pipeline API.
+    Note: The pipeline object will download the model (if not already cached) and load it
+    into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
     """
     def __init__(
@@ -62,13 +65,12 @@ class HuggingFaceProvider(ModelProvider):
         )
         self.options = self.get_client_options()
         self._expected_operation_type = None
-        self.load_client()
+        self._download_model()
     @staticmethod
     def _extract_string_output(response: list[dict]) -> str:
         """
-        Extracts the first generated string from Hugging Face pipeline output,
-        regardless of whether it's plain text-generation or chat-style output.
+        Extracts the first generated string from Hugging Face pipeline output
         """
         if not isinstance(response, list) or len(response) == 0:
             raise ValueError("Empty or invalid pipeline output")
@@ -86,6 +88,35 @@ class HuggingFaceProvider(ModelProvider):
             subpath = ""
         return endpoint, subpath
+    @property
+    def client(self) -> Any:
+        """
+        Lazily return the HuggingFace-pipeline client.
+        If the client has not been initialized yet, it will be created
+        by calling `load_client`.
+        """
+        self.load_client()
+        return self._client
+    def _download_model(self):
+        """
+        Pre-downloads model files locally to prevent race conditions in multiprocessing.
+        Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
+        file copying for safe concurrent access across multiple processes.
+        :raises:
+            ImportError: If huggingface_hub package is not installed.
+        """
+        try:
+            from huggingface_hub import snapshot_download
+            # Download the model and tokenizer files directly to the cache.
+            snapshot_download(repo_id=self.model, local_dir_use_symlinks=False)
+        except ImportError as exc:
+            raise ImportError("huggingface_hub package is not installed") from exc
     def _response_handler(
         self,
         response: Union[str, list],
@@ -94,27 +125,46 @@ class HuggingFaceProvider(ModelProvider):
         **kwargs,
     ) -> Union[str, list, dict[str, Any]]:
         """
-        Same as `ModelProvider._response_handler`.
+        Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
-        * Expected to receive the response with `return_full_text=False`.
+        The response should exclude the user’s input (no repetition in the output).
+        This can be accomplished by invoking the pipeline with `return_full_text=False`.
-        :param messages:                Same as in `ModelProvider._response_handler`.
-        :param response:                Same as in `ModelProvider._response_handler`.
-        :param invoke_response_format:  Same as in `ModelProvider._response_handler`, in full and string modes.
+        :param response:                The raw response from the HuggingFace pipeline, typically a list of dictionaries
+                                        containing generated text sequences.
+        :param invoke_response_format:  Determines how the response should be processed and returned. Options:
-                                        For usage mode, generate 3 statistics:
-                                        prompt_tokens, completion_tokens and total_tokens.
+                                       - STRING: Return only the main generated content as a string,
+                                                 for single-answer responses.
+                                       - USAGE: Return a dictionary combining the string response with
+                                                token usage statistics:
-                                        NOTE: Token counts are estimated after answer generation and
-                                        may differ from the actual tokens generated by the model due to
-                                        internal decoding behavior and implementation details.
+                                       .. code-block:: json
-        :param kwargs:                  Same as in `ModelProvider._response_handler`.
+                                       {
+                                           "answer": "<generated_text>",
+                                           "usage": {
+                                               "prompt_tokens": <int>,
+                                               "completion_tokens": <int>,
+                                               "total_tokens": <int>
+                                           }
+                                       }
-        :return: The result formatted according to the `invoke_response_format`.
+                                       Note: Token counts are estimated after answer generation and
+                                       may differ from the actual tokens generated by the model due to
+                                       internal decoding behavior and implementation details.
+                                       - FULL: Return the full raw response object.
+        :param messages:               The original input messages used for token count estimation in USAGE mode.
+                                       Can be a string, list of strings, or chat format messages.
+        :param kwargs:                 Additional parameters for response processing.
+        :return:                       The processed response in the format specified by `invoke_response_format`.
+                                       Can be a string, dictionary, or the original response object.
         :raises MLRunInvalidArgumentError: If extracting the string response fails.
-        :raises MLRunRuntimeError: If applying the chat template to the model fails.
+        :raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
         """
         if InvokeResponseFormat.is_str_response(invoke_response_format.value):
             str_response = self._extract_string_output(response)
@@ -161,11 +211,15 @@ class HuggingFaceProvider(ModelProvider):
         :raises:
             ImportError: If the `transformers` package is not installed.
         """
+        if self._client:
+            return
         try:
             from transformers import pipeline, AutoModelForCausalLM  # noqa
             from transformers import AutoTokenizer  # noqa
             from transformers.pipelines.base import Pipeline  # noqa
+            self.options["model_kwargs"] = self.options.get("model_kwargs", {})
+            self.options["model_kwargs"]["local_files_only"] = True
             self._client = pipeline(model=self.model, **self.options)
             self._expected_operation_type = Pipeline
         except ImportError as exc:
@@ -186,23 +240,38 @@ class HuggingFaceProvider(ModelProvider):
         self, operation: Optional["Pipeline"] = None, **invoke_kwargs
     ) -> Union[list, dict, Any]:
         """
-        HuggingFace implementation of `ModelProvider.custom_invoke`.
-        Use the default config in provider client/ user defined client:
+        Invokes a HuggingFace pipeline operation with the given keyword arguments.
+        This method provides flexibility to use a custom pipeline object for specific tasks
+        (e.g., image classification, sentiment analysis).
+        The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
         Example:
-        ```python
+            ```python
+            from transformers import pipeline
+            from PIL import Image
+            # Using custom pipeline for image classification
             image = Image.open(image_path)
-            pipeline_object =  pipeline("image-classification", model="microsoft/resnet-50")
+            pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
             result = hf_provider.custom_invoke(
                 pipeline_object,
                 inputs=image,
             )
-        ```
+            ```
+        :param operation:      A Pipeline object from the transformers library.
+                               If not provided, defaults to the provider's configured pipeline.
+        :param invoke_kwargs:  Keyword arguments to pass to the pipeline operation.
+                               These are merged with `default_invoke_kwargs` and may include
+                               parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
-        :param operation:               A pipeline object
-        :param invoke_kwargs:           Keyword arguments to pass to the operation.
-        :return:                        The full response returned by the operation.
+        :return:               The full response returned by the pipeline operation.
+                               Format depends on the pipeline task (list for text generation,
+                               dict for classification, etc.).
+        :raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
         """
         invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
@@ -222,12 +291,24 @@ class HuggingFaceProvider(ModelProvider):
         **invoke_kwargs,
     ) -> Union[str, list, dict[str, Any]]:
         """
-        HuggingFace-specific implementation of `ModelProvider.invoke`.
-        Invokes a HuggingFace model operation using the synchronous client.
-        For full details, see `ModelProvider.invoke`.
+        HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
+        Invokes a HuggingFace model operation for text generation tasks.
+        Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
         :param messages:
-            Same as `ModelProvider.invoke`.
+            Input for the text generation model. Can be provided in multiple formats:
+            - A single string: Direct text input for generation
+            - A list of strings: Multiple text inputs for batch processing
+            - Chat format: A list of dictionaries with "role" and "content" keys:
+            .. code-block:: json
+                [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What is the capital of France?"}
+                ]
         :param invoke_response_format: InvokeResponseFormat
             Specifies the format of the returned response. Options:
@@ -245,17 +326,24 @@ class HuggingFaceProvider(ModelProvider):
                     }
                 }
+            Note: For usage mode, the model tokenizer should support apply_chat_template.
             - "full":   Returns the raw response object from the HuggingFace model,
                         typically a list of generated sequences (dictionaries).
                         This format does not include token usage statistics.
         :param invoke_kwargs:
-            Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
+            Additional keyword arguments passed to the HuggingFace pipeline.
         :return:
             A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
-        """
+        :raises MLRunInvalidArgumentError:
+            If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
+            string content.
+        :raises MLRunRuntimeError:
+            If using "usage" response mode and the model tokenizer does not support chat template formatting.
+        """
         if self.client.task != "text-generation":
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "HuggingFaceProvider.invoke supports text-generation task only"

mlrun/datastore/model_provider/model_provider.py CHANGED Viewed

@@ -108,7 +108,7 @@ class ModelProvider(BaseRemoteClient):
                                                 additional metadata or token usage statistics, in this format:
                                                 {"answer": <string>, "usage": <dict>}
-                                       - FULL: Return the full raw response object unmodified.
+                                       - FULL: Return the full raw response object.
         :param kwargs:                  Additional parameters that may be required by specific implementations.
@@ -164,7 +164,9 @@ class ModelProvider(BaseRemoteClient):
             )
         return self._async_client
-    def custom_invoke(self, operation: Optional[Callable], **invoke_kwargs) -> Any:
+    def custom_invoke(
+        self, operation: Optional[Callable] = None, **invoke_kwargs
+    ) -> Any:
         """
         Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
@@ -263,5 +265,61 @@ class ModelProvider(BaseRemoteClient):
         invoke_response_format=InvokeResponseFormat.FULL,
         **invoke_kwargs,
     ) -> Union[str, dict[str, Any], Any]:
-        """Async version of `invoke`. See `invoke` for full documentation."""
+        """
+        Asynchronously invokes a generative AI model with the provided messages and additional parameters.
+        This method is designed to be a flexible interface for interacting with various
+        generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
+        a list of messages (following a standardized format) and receive a response.
+        :param messages:            A list of dictionaries representing the conversation history or input messages.
+                                    Each dictionary should follow the format::
+                                    {"role": "system"| "user" | "assistant" ..., "content":
+                                    "Message content as a string"}
+                                    Example:
+                                    .. code-block:: json
+                                        [
+                                            {"role": "system", "content": "You are a helpful assistant."},
+                                            {"role": "user", "content": "What is the capital of France?"}
+                                        ]
+                                    This format is consistent across all backends. Defaults to None if no messages
+                                    are provided.
+        :param invoke_response_format:   Determines how the model response is returned:
+                                    - string:   Returns only the generated text content from the model output,
+                                                for single-answer responses only.
+                                    - usage:    Combines the STRING response with additional metadata (token usage),
+                                                and returns the result in a dictionary.
+                                                Note: The usage dictionary may contain additional
+                                                keys depending on the model provider:
+                                    .. code-block:: json
+                                    {
+                                        "answer": "<generated_text>",
+                                        "usage": {
+                                        "prompt_tokens": <int>,
+                                        "completion_tokens": <int>,
+                                        "total_tokens": <int>
+                                        }
+                                    }
+                                    - full:   Returns the full model output.
+        :param invoke_kwargs:
+                                    Additional keyword arguments to be passed to the underlying model API call.
+                                    These can include parameters such as temperature, max tokens, etc.,
+                                    depending on the capabilities of the specific backend being used.
+        :return:                    The invoke result formatted according to the specified
+                                    invoke_response_format parameter.
+        """
         raise NotImplementedError("async_invoke is not implemented")

mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc25__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc24py3-none-any.whl → 1.10.0rc25py3-none-any.whl