PyPI - mlrun - Versions diffs - 1.10.0rc5__py3-none-any.whl → 1.10.0rc6__py3-none-any.whl - Mend

mlrun 1.10.0rc5py3-none-any.whl → 1.10.0rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (35) hide show

mlrun/artifacts/model.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +14 -2
mlrun/common/schemas/model_monitoring/functions.py +66 -0
mlrun/common/schemas/project.py +3 -0
mlrun/config.py +3 -3
mlrun/db/base.py +13 -0
mlrun/db/httpdb.py +47 -0
mlrun/db/nopdb.py +12 -0
mlrun/launcher/client.py +23 -0
mlrun/model_monitoring/db/tsdb/base.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +118 -50
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -24
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +106 -15
mlrun/projects/project.py +40 -1
mlrun/runtimes/base.py +0 -27
mlrun/runtimes/daskjob.py +4 -4
mlrun/runtimes/databricks_job/databricks_runtime.py +0 -2
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/nuclio/application/application.py +0 -5
mlrun/runtimes/nuclio/function.py +0 -11
mlrun/runtimes/nuclio/serving.py +0 -6
mlrun/runtimes/pod.py +1 -3
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/states.py +16 -18
mlrun/utils/helpers.py +15 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc6.dist-info}/METADATA +2 -1
{mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc6.dist-info}/RECORD +35 -34
{mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc6.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc6.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc6.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc6.dist-info}/top_level.txt +0 -0

mlrun/artifacts/model.py CHANGED Viewed

@@ -187,7 +187,7 @@ class ModelArtifact(Artifact):
         :param model_url:       Remote model url.
         :param default_config:  Default configuration for client building
                                 Saved as a sub-dictionary under the parameter.
-        :param kwargs:
+        :param kwargs:          Arguments to pass to the artifact class.
         """
         if key or body or format or target_path:
             warnings.warn(
@@ -366,7 +366,7 @@ class ModelArtifact(Artifact):
     def before_log(self):
         if not self.spec.model_file and not self.spec.model_url:
             raise ValueError(
-                "ModelArtifact must have either model_file or model_url attributes"
+                "ModelArtifact must have either 'model_file' or 'model_url' attributes"
             )
         super().before_log()
@@ -479,7 +479,7 @@ def get_model(
     ] = None,
     suffix="",
 ) -> (str, ModelArtifact, dict):
-    """return model file, model spec object, and dictionary of extra data items
+    """Return model file, model spec object, and dictionary of extra data items
     this function will get the model file, metadata, and extra data
     the returned model file is always local, when using remote urls

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -43,6 +43,7 @@ from .constants import (
     WriterEvent,
     WriterEventKind,
 )
+from .functions import FunctionsType, FunctionSummary
 from .grafana import (
     GrafanaColumn,
     GrafanaColumnType,

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -416,14 +416,22 @@ class ResultStatusApp(IntEnum):
     detected = 2
-class ModelMonitoringAppLabel:
+class ModelMonitoringLabel:
     KEY = mlrun.common.constants.MLRunInternalLabels.mlrun_type
-    VAL = "mlrun__model-monitoring-application"
+    VAL = ""
     def __str__(self) -> str:
         return f"{self.KEY}={self.VAL}"
+class ModelMonitoringAppLabel(ModelMonitoringLabel):
+    VAL = "mlrun__model-monitoring-application"
+class ModelMonitoringInfraLabel(ModelMonitoringLabel):
+    VAL = "mlrun__model-monitoring-infra"
 class HistogramDataDriftApplicationConstants:
     NAME = "histogram-data-drift"
     GENERAL_RESULT_NAME = "general_drift"
@@ -438,6 +446,10 @@ class SpecialApps:
     MLRUN_INFRA = "mlrun-infra"
+class ModelMonitoringLabels:
+    MLRUN_MODEL_MONITORING_INFRA = "mlrun-model-monitoring-infra"
 _RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]

mlrun/common/schemas/model_monitoring/functions.py ADDED Viewed

@@ -0,0 +1,66 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import enum
+from datetime import datetime
+from typing import Optional
+from pydantic.v1 import BaseModel
+class FunctionsType(enum.Enum):
+    APPLICATION = "application"
+    INFRA = "infra"
+class FunctionSummary(BaseModel):
+    """
+    Function summary model. Includes metadata about the function, such as its name, as well as statistical
+    metrics such as the number of detections and possible detections. A function summary can be from either a
+    model monitoring application (type "application") or an infrastructure function (type "infra").
+    """
+    type: FunctionsType
+    name: str
+    application_class: str
+    updated_time: datetime
+    status: Optional[str] = None
+    base_period: Optional[int] = None
+    stats: Optional[dict] = None
+    @classmethod
+    def from_function_dict(
+        cls,
+        func_dict: dict,
+        func_type=FunctionsType.APPLICATION,
+        base_period: Optional[int] = None,
+        stats: Optional[dict] = None,
+    ):
+        """
+        Create a FunctionSummary instance from a dictionary.
+        """
+        return cls(
+            type=func_type,
+            name=func_dict["metadata"]["name"],
+            application_class=""
+            if func_type != FunctionsType.APPLICATION
+            else func_dict["spec"]["graph"]["steps"]["PushToMonitoringWriter"]["after"][
+                0
+            ],
+            updated_time=func_dict["metadata"].get("updated"),
+            status=func_dict["status"].get("state"),
+            base_period=base_period,
+            stats=stats,
+        )

mlrun/common/schemas/project.py CHANGED Viewed

@@ -145,6 +145,9 @@ class ProjectSummary(pydantic.v1.BaseModel):
     endpoint_alerts_count: int = 0
     job_alerts_count: int = 0
     other_alerts_count: int = 0
+    datasets_count: int = 0
+    documents_count: int = 0
+    llm_prompts_count: int = 0
 class IguazioProject(pydantic.v1.BaseModel):

mlrun/config.py CHANGED Viewed

@@ -78,12 +78,12 @@ default_config = {
     "vendor_images_registry": "",
     # comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
     # registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
-    # mlrun/ml-base, etc...)
+    # mlrun/mlrun-kfp, etc...)
     "images_to_enrich_registry": "^mlrun/*,^python:3.(9|11)$",
     "kfp_url": "",
     "kfp_ttl": "14400",  # KFP ttl in sec, after that completed PODs will be deleted
     "kfp_image": "mlrun/mlrun-kfp",  # image to use for KFP runner
-    "dask_kfp_image": "mlrun/ml-base",  # image to use for dask KFP runner
+    "dask_kfp_image": "mlrun/mlrun",  # image to use for dask KFP runner
     "igz_version": "",  # the version of the iguazio system the API is running on
     "iguazio_api_url": "",  # the url to iguazio api
     "spark_app_image": "",  # image to use for spark operator app runtime
@@ -287,7 +287,7 @@ default_config = {
             "serving": "mlrun/mlrun",
             "nuclio": "mlrun/mlrun",
             "remote": "mlrun/mlrun",
-            "dask": "mlrun/ml-base",
+            "dask": "mlrun/mlrun",
             "mpijob": "mlrun/mlrun",
             "application": "python",
         },

mlrun/db/base.py CHANGED Viewed

@@ -1119,6 +1119,19 @@ class RunDBInterface(ABC):
     ) -> None:
         pass
+    @abstractmethod
+    def get_monitoring_function_summaries(
+        self,
+        project: str,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+        names: Optional[Union[list[str], str]] = None,
+        labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
+        include_stats: bool = False,
+        include_infra: bool = True,
+    ) -> list[mlrun.common.schemas.model_monitoring.FunctionSummary]:
+        pass
     @abstractmethod
     def get_project_summary(self, project: str) -> mlrun.common.schemas.ProjectSummary:
         pass

mlrun/db/httpdb.py CHANGED Viewed

@@ -50,6 +50,7 @@ from mlrun_pipelines.utils import compile_pipeline
 from ..artifacts import Artifact
 from ..common.schemas import AlertActivations
+from ..common.schemas.model_monitoring import FunctionSummary
 from ..config import config
 from ..datastore.datastore_profile import DatastoreProfile2Json
 from ..feature_store import FeatureSet, FeatureVector
@@ -4118,6 +4119,52 @@ class HTTPRunDB(RunDBInterface):
             params={**credentials, "replace_creds": replace_creds},
         )
+    def get_monitoring_function_summaries(
+        self,
+        project: str,
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
+        names: Optional[Union[list[str], str]] = None,
+        labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
+        include_stats: bool = False,
+        include_infra: bool = True,
+    ) -> list[FunctionSummary]:
+        """
+        Get monitoring function summaries for the specified project.
+        :param project: The name of the project.
+        :param start: Start time for filtering the results (optional).
+        :param end: End time for filtering the results (optional).
+        :param names: List of function names to filter by (optional).
+        :param labels: Labels to filter by (optional).
+        :param include_stats: Whether to include statistics in the response (default is False).
+        :param include_infra: whether to include model monitoring infrastructure functions (default is True).
+        :return: A list of FunctionSummary objects containing information about the monitoring functions.
+        """
+        path = f"projects/{project}/model-monitoring/function-summaries"
+        labels = self._parse_labels(labels)
+        if names and isinstance(names, str):
+            names = [names]
+        response = self.api_call(
+            method=mlrun.common.types.HTTPMethod.GET,
+            path=path,
+            params={
+                "start": datetime_to_iso(start),
+                "end": datetime_to_iso(end),
+                "name": names,
+                "label": labels,
+                "include-stats": include_stats,
+                "include-infra": include_infra,
+            },
+        )
+        results = []
+        for item in response.json():
+            results.append(FunctionSummary(**item))
+        return results
     def create_hub_source(
         self, source: Union[dict, mlrun.common.schemas.IndexedHubSource]
     ):

mlrun/db/nopdb.py CHANGED Viewed

@@ -893,6 +893,18 @@ class NopDB(RunDBInterface):
     ) -> None:
         pass
+    def get_monitoring_function_summaries(
+        self,
+        project: str,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+        names: Optional[Union[list[str], str]] = None,
+        labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
+        include_stats: bool = False,
+        include_infra: bool = True,
+    ) -> [mlrun.common.schemas.model_monitoring.FunctionSummary]:
+        pass
     def generate_event(
         self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
     ):

mlrun/launcher/client.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
+import warnings
 from typing import Optional
 import IPython.display
@@ -23,6 +24,7 @@ import mlrun.lists
 import mlrun.model
 import mlrun.runtimes
 import mlrun.utils
+import mlrun.utils.version
 class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
@@ -60,6 +62,27 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
         ):
             image = mlrun.mlconf.function_defaults.image_by_kind.to_dict()[runtime.kind]
+        # Warn if user explicitly set the deprecated mlrun/ml-base image
+        if image and "mlrun/ml-base" in image:
+            client_version = mlrun.utils.version.Version().get()["version"]
+            auto_replaced = mlrun.utils.validate_component_version_compatibility(
+                "mlrun-client", "1.10.0", mlrun_client_version=client_version
+            )
+            message = (
+                "'mlrun/ml-base' image is deprecated in 1.10.0 and will be removed in 1.12.0, "
+                "use 'mlrun/mlrun' instead."
+            )
+            if auto_replaced:
+                message += (
+                    " Since your client version is >= 1.10.0, the image will be automatically "
+                    "replaced with mlrun/mlrun."
+                )
+            warnings.warn(
+                message,
+                # TODO: Remove this in 1.12.0
+                FutureWarning,
+            )
         # TODO: need a better way to decide whether a function requires a build
         if require_build and image and not runtime.spec.build.base_image:
             # when the function require build use the image as the base_image for the build

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -328,6 +328,36 @@ class TSDBConnector(ABC):
         If an endpoint has not been invoked within the specified time range, it will not appear in the result.
         """
+    @abstractmethod
+    def count_results_by_status(
+        self,
+        start: Optional[Union[datetime, str]] = None,
+        end: Optional[Union[datetime, str]] = None,
+        endpoint_ids: Optional[Union[str, list[str]]] = None,
+        application_names: Optional[Union[str, list[str]]] = None,
+        result_status_list: Optional[list[int]] = None,
+    ) -> dict[tuple[str, int], int]:
+        """
+        Read results status from the TSDB and return a dictionary of results statuses by application name.
+        :param start:              The start time in which to read the results. By default, the last 24 hours are read.
+        :param end:                The end time in which to read the results. Default is the current time (now).
+        :param endpoint_ids:       Optional list of endpoint ids to filter the results by. By default, all
+                                   endpoint ids are included.
+        :param application_names:  Optional list of application names to filter the results by. By default, all
+                                   application are included.
+        :param result_status_list: Optional list of result statuses to filter the results by. By default, all
+                                   result statuses are included.
+        :return: A dictionary where the key is a tuple of (application_name, result_status) and the value is the total
+                 number of results with that status for that application.
+                 For example:
+                 {
+                    ('app1', 1): 10,
+                    ('app1', 2): 5
+                 }
+        """
     async def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py CHANGED Viewed

@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import traceback
+import time
 from collections.abc import Callable
 from enum import Enum
 from typing import Any, Final, Optional, Union
@@ -20,6 +19,9 @@ from typing import Any, Final, Optional, Union
 import taosws
 from taosws import TaosStmt
+import mlrun
+from mlrun.utils import logger
 class _StrEnum(str, Enum):
     pass
@@ -137,40 +139,99 @@ class Statement:
         return statement
-def _run(connection_string, prefix_statements, q, statements, query):
-    try:
-        conn = taosws.connect(connection_string)
-        for statement in prefix_statements + statements:
-            if isinstance(statement, Statement):
-                prepared_statement = statement.prepare(conn.statement())
-                prepared_statement.execute()
-            else:
-                conn.execute(statement)
-        if not query:
-            q.put(None)
-            return
-        res = conn.query(query)
-        # taosws.TaosField is not serializable
-        fields = [
-            Field(field.name(), field.type(), field.bytes()) for field in res.fields
-        ]
-        q.put(QueryResult(list(res), fields))
-    except Exception as e:
-        tb = traceback.format_exc()
-        q.put(ErrorResult(tb, e))
 class TDEngineConnection:
-    def __init__(self, connection_string):
+    def __init__(self, connection_string, max_retries=3, retry_delay=0.5):
         self._connection_string = connection_string
         self.prefix_statements = []
+        self._max_retries = max_retries
+        self._retry_delay = retry_delay
-        self._conn = taosws.connect(self._connection_string)
+        self._conn = self._create_connection()
+    def _create_connection(self):
+        """Create a new TDEngine connection."""
+        return taosws.connect(self._connection_string)
+    def _reconnect(self):
+        """Close current connection and create a new one."""
+        try:
+            if hasattr(self, "_conn") and self._conn:
+                self._conn.close()
+        except Exception as e:
+            logger.warning(f"Error closing connection during reconnect: {e}")
+        self._conn = self._create_connection()
+        logger.info("Successfully reconnected to TDEngine")
+    def _execute_with_retry(self, operation, operation_name, *args, **kwargs):
+        """
+        Execute an operation with retry logic for connection failures.
+        :param operation: The function to execute
+        :param operation_name: Name of the operation for logging
+        :param args: Arguments to pass to the operation
+        :param kwargs: Keyword arguments to pass to the operation
+        :return: Result of the operation
+        """
+        last_exception = None
+        for attempt in range(self._max_retries + 1):  # +1 for initial attempt
+            try:
+                return operation(*args, **kwargs)
+            except taosws.Error as e:
+                last_exception = e
+                if attempt < self._max_retries:
+                    logger.warning(
+                        f"Connection error during {operation_name} "
+                        f"(attempt {attempt + 1}/{self._max_retries + 1}): {e}. "
+                        f"Retrying in {self._retry_delay} seconds..."
+                    )
+                    # Wait before retrying
+                    time.sleep(self._retry_delay)
+                    # Reconnect
+                    try:
+                        self._reconnect()
+                    except Exception as reconnect_error:
+                        logger.error(f"Failed to reconnect: {reconnect_error}")
+                        if attempt == self._max_retries - 1:
+                            # Last attempt, raise the reconnection error
+                            raise TDEngineError(
+                                f"Failed to reconnect after {operation_name} failure: {reconnect_error}"
+                            ) from reconnect_error
+                        continue
+                else:
+                    # Max retries exceeded
+                    logger.error(
+                        f"Max retries ({self._max_retries}) exceeded for {operation_name}"
+                    )
+                    break
+            except Exception as e:
+                # Non-TDEngine error, don't retry
+                raise TDEngineError(
+                    f"Unexpected error during {operation_name}: {e}"
+                ) from e
+        # If we get here, all retries failed
+        raise TDEngineError(
+            f"Failed to {operation_name} after {self._max_retries} retries: {last_exception}"
+        ) from last_exception
+    def _execute_statement(self, statement):
+        """Execute a single statement (string or Statement object)."""
+        if isinstance(statement, Statement):
+            prepared_statement = statement.prepare(self._conn.statement())
+            prepared_statement.execute()
+        else:
+            self._conn.execute(statement)
+    def _execute_query(self, query):
+        """Execute a query and return the result."""
+        return self._conn.query(query)
     def run(
         self,
@@ -181,33 +242,40 @@ class TDEngineConnection:
         if not isinstance(statements, list):
             statements = [statements]
-        for statement in self.prefix_statements + statements:
+        # Execute all statements with retry logic
+        all_statements = self.prefix_statements + statements
+        for i, statement in enumerate(all_statements):
+            operation_name = f"execute statement {i + 1}/{len(all_statements)}"
             if isinstance(statement, Statement):
-                try:
-                    prepared_statement = statement.prepare(self._conn.statement())
-                    prepared_statement.execute()
-                except taosws.Error as e:
-                    raise TDEngineError(
-                        f"Failed to run prepared statement `{self._conn.statement()}`: {e}"
-                    ) from e
+                operation_name += " (prepared)"
             else:
-                try:
-                    self._conn.execute(statement)
-                except taosws.Error as e:
-                    raise TDEngineError(
-                        f"Failed to run statement `{statement}`: {e}"
-                    ) from e
+                operation_name += f" `{statement}`"
+            self._execute_with_retry(self._execute_statement, operation_name, statement)
         if not query:
             return None
-        try:
-            res = self._conn.query(query)
-        except taosws.Error as e:
-            raise TDEngineError(f"Failed to run query `{query}`: {e}") from e
+        # Execute query with retry logic
+        res = self._execute_with_retry(
+            self._execute_query, f"execute query `{query}`", query
+        )
+        # Process results
         fields = [
             Field(field.name(), field.type(), field.bytes()) for field in res.fields
         ]
         return QueryResult(list(res), fields)
+    def close(self):
+        """Close the connection."""
+        try:
+            if self._conn:
+                self._conn.close()
+                logger.debug("TDEngine connection closed")
+                self._conn = None
+        except Exception as e:
+            logger.warning(
+                f"Error closing TDEngine connection: {mlrun.errors.err_to_str(e)}"
+            )

mlrun 1.10.0rc5__py3-none-any.whl → 1.10.0rc6__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc5py3-none-any.whl → 1.10.0rc6py3-none-any.whl