PyPI - mlrun - Versions diffs - 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl - Mend

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show

mlrun/__main__.py +4 -2
mlrun/alerts/alert.py +75 -8
mlrun/artifacts/base.py +1 -0
mlrun/artifacts/manager.py +9 -2
mlrun/common/constants.py +4 -1
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
mlrun/common/formatters/run.py +3 -0
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/__init__.py +3 -1
mlrun/common/schemas/alert.py +15 -12
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/auth.py +5 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/frontend_spec.py +7 -0
mlrun/common/schemas/function.py +7 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -3
mlrun/common/schemas/model_monitoring/constants.py +41 -26
mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
mlrun/common/schemas/notification.py +69 -12
mlrun/common/schemas/project.py +45 -12
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +1 -0
mlrun/config.py +91 -35
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +57 -25
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/alibaba_oss.py +3 -2
mlrun/datastore/azure_blob.py +125 -37
mlrun/datastore/base.py +42 -21
mlrun/datastore/datastore.py +4 -2
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +85 -29
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +25 -12
mlrun/datastore/sources.py +76 -4
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +102 -131
mlrun/datastore/v3io.py +1 -0
mlrun/db/base.py +15 -6
mlrun/db/httpdb.py +57 -28
mlrun/db/nopdb.py +29 -5
mlrun/errors.py +20 -3
mlrun/execution.py +46 -5
mlrun/feature_store/api.py +25 -1
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/spark_merger.py +10 -39
mlrun/feature_store/steps.py +8 -0
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -3
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/k8s_utils.py +48 -2
mlrun/launcher/client.py +6 -6
mlrun/launcher/local.py +2 -2
mlrun/model.py +215 -34
mlrun/model_monitoring/api.py +38 -24
mlrun/model_monitoring/applications/__init__.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +60 -29
mlrun/model_monitoring/applications/base.py +2 -174
mlrun/model_monitoring/applications/context.py +197 -70
mlrun/model_monitoring/applications/evidently_base.py +11 -85
mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
mlrun/model_monitoring/applications/results.py +4 -4
mlrun/model_monitoring/controller.py +110 -282
mlrun/model_monitoring/db/stores/__init__.py +8 -3
mlrun/model_monitoring/db/stores/base/store.py +3 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
mlrun/model_monitoring/db/tsdb/base.py +147 -15
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
mlrun/model_monitoring/helpers.py +70 -50
mlrun/model_monitoring/stream_processing.py +96 -195
mlrun/model_monitoring/writer.py +13 -5
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/operations.py +16 -8
mlrun/projects/pipelines.py +126 -115
mlrun/projects/project.py +286 -129
mlrun/render.py +3 -3
mlrun/run.py +38 -19
mlrun/runtimes/__init__.py +19 -8
mlrun/runtimes/base.py +4 -1
mlrun/runtimes/daskjob.py +1 -1
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/kubejob.py +6 -6
mlrun/runtimes/local.py +12 -5
mlrun/runtimes/nuclio/api_gateway.py +68 -8
mlrun/runtimes/nuclio/application/application.py +307 -70
mlrun/runtimes/nuclio/function.py +63 -14
mlrun/runtimes/nuclio/serving.py +10 -10
mlrun/runtimes/pod.py +25 -19
mlrun/runtimes/remotesparkjob.py +2 -5
mlrun/runtimes/sparkjob/spark3job.py +16 -17
mlrun/runtimes/utils.py +34 -0
mlrun/serving/routers.py +2 -5
mlrun/serving/server.py +37 -19
mlrun/serving/states.py +30 -3
mlrun/serving/v2_serving.py +44 -35
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +1 -1
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +150 -36
mlrun/utils/http.py +1 -1
mlrun/utils/notifications/notification/__init__.py +0 -1
mlrun/utils/notifications/notification/webhook.py +8 -1
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/evidently_application.py +0 -20
mlrun/model_monitoring/prometheus.py +0 -216
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/stores/sqldb/models/mysql.py CHANGED Viewed

@@ -18,6 +18,7 @@ from sqlalchemy.ext.declarative import declarative_base, declared_attr
 from mlrun.common.schemas.model_monitoring import (
     EventFieldType,
+    ResultData,
     WriterEvent,
 )
@@ -32,12 +33,21 @@ Base = declarative_base()
 class ModelEndpointsTable(Base, ModelEndpointsBaseTable):
+    feature_stats = Column(
+        EventFieldType.FEATURE_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
+    current_stats = Column(
+        EventFieldType.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
+    metrics = Column(EventFieldType.METRICS, sqlalchemy.dialects.mysql.MEDIUMTEXT)
     first_request = Column(
         EventFieldType.FIRST_REQUEST,
+        # TODO: migrate to DATETIME, see ML-6921
         sqlalchemy.dialects.mysql.TIMESTAMP(fsp=3, timezone=True),
     )
     last_request = Column(
         EventFieldType.LAST_REQUEST,
+        # TODO: migrate to DATETIME, see ML-6921
         sqlalchemy.dialects.mysql.TIMESTAMP(fsp=3, timezone=True),
     )
@@ -52,11 +62,11 @@ class _ApplicationResultOrMetric:
     start_infer_time = Column(
         WriterEvent.START_INFER_TIME,
-        sqlalchemy.dialects.mysql.TIMESTAMP(fsp=3, timezone=True),
+        sqlalchemy.dialects.mysql.DATETIME(fsp=3, timezone=True),
     )
     end_infer_time = Column(
         WriterEvent.END_INFER_TIME,
-        sqlalchemy.dialects.mysql.TIMESTAMP(fsp=3, timezone=True),
+        sqlalchemy.dialects.mysql.DATETIME(fsp=3, timezone=True),
     )
     @declared_attr
@@ -70,7 +80,12 @@ class _ApplicationResultOrMetric:
 class ApplicationResultTable(
     Base, _ApplicationResultOrMetric, ApplicationResultBaseTable
 ):
-    pass
+    result_extra_data = Column(
+        ResultData.RESULT_EXTRA_DATA, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
+    current_stats = Column(
+        ResultData.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
 class ApplicationMetricsTable(

mlrun/model_monitoring/db/stores/sqldb/sql_store.py CHANGED Viewed

@@ -20,7 +20,7 @@ import pandas as pd
 import sqlalchemy
 import sqlalchemy.exc
 import sqlalchemy.orm
-from sqlalchemy.engine import make_url
+from sqlalchemy.engine import Engine, make_url
 from sqlalchemy.sql.elements import BinaryExpression
 import mlrun.common.model_monitoring.helpers
@@ -61,9 +61,15 @@ class SQLStoreBase(StoreBase):
             )
         self._sql_connection_string = kwargs.get("store_connection_string")
-        self._engine = get_engine(dsn=self._sql_connection_string)
+        self._engine = None
         self._init_tables()
+    @property
+    def engine(self) -> Engine:
+        if not self._engine:
+            self._engine = get_engine(dsn=self._sql_connection_string)
+        return self._engine
     def create_tables(self):
         self._create_tables_if_not_exist()
@@ -116,7 +122,7 @@ class SQLStoreBase(StoreBase):
         :param table_name: Target table name.
         :param event:      Event dictionary that will be written into the DB.
         """
-        with self._engine.connect() as connection:
+        with self.engine.connect() as connection:
             # Convert the result into a pandas Dataframe and write it into the database
             event_df = pd.DataFrame([event])
             event_df.to_sql(table_name, con=connection, index=False, if_exists="append")
@@ -177,6 +183,11 @@ class SQLStoreBase(StoreBase):
         param table:     SQLAlchemy declarative table.
         :param criteria: A list of binary expressions that filter the query.
         """
+        if not self.engine.has_table(table.__tablename__):
+            logger.debug(
+                f"Table {table.__tablename__} does not exist in the database. Skipping deletion."
+            )
+            return
         with create_session(dsn=self._sql_connection_string) as session:
             # Generate and commit the delete query
             session.query(
@@ -266,22 +277,8 @@ class SQLStoreBase(StoreBase):
         labels: list[str] = None,
         top_level: bool = None,
         uids: list = None,
+        include_stats: bool = None,
     ) -> list[dict[str, typing.Any]]:
-        """
-        Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
-        By default, when no filters are applied, all available model endpoints for the given project will
-        be listed.
-        :param model:           The name of the model to filter by.
-        :param function:        The name of the function to filter by.
-        :param labels:          A list of labels to filter by. Label filters work by either filtering a specific value
-                                of a label (i.e. list("key=value")) or by looking for the existence of a given
-                                key (i.e. "key").
-        :param top_level:       If True will return only routers and endpoint that are NOT children of any router.
-        :param uids:             List of model endpoint unique ids to include in the result.
-        :return: A list of model endpoint dictionaries.
-        """
         # Generate an empty model endpoints that will be filled afterwards with model endpoint dictionaries
         endpoint_list = []
@@ -341,6 +338,12 @@ class SQLStoreBase(StoreBase):
                 ):
                     continue
+                if not include_stats:
+                    # Exclude these fields when listing model endpoints to avoid returning too much data (ML-6594)
+                    # TODO: Remove stats from table schema (ML-7196)
+                    endpoint_dict.pop(mm_schemas.EventFieldType.FEATURE_STATS)
+                    endpoint_dict.pop(mm_schemas.EventFieldType.CURRENT_STATS)
                 endpoint_list.append(endpoint_dict)
         return endpoint_list
@@ -527,9 +530,9 @@ class SQLStoreBase(StoreBase):
         for table in self._tables:
             # Create table if not exist. The `metadata` contains the `ModelEndpointsTable`
             db_name = make_url(self._sql_connection_string).database
-            if not self._engine.has_table(table):
+            if not self.engine.has_table(table):
                 logger.info(f"Creating table {table} on {db_name} db.")
-                self._tables[table].metadata.create_all(bind=self._engine)
+                self._tables[table].metadata.create_all(bind=self.engine)
             else:
                 logger.info(f"Table {table} already exists on {db_name} db.")
@@ -577,12 +580,19 @@ class SQLStoreBase(StoreBase):
         """
         Delete all the model monitoring resources of the project in the SQL tables.
         """
+        logger.debug(
+            "Deleting model monitoring endpoints resources from the SQL tables",
+            project=self.project,
+        )
         endpoints = self.list_model_endpoints()
-        logger.debug("Deleting model monitoring resources", project=self.project)
         for endpoint_dict in endpoints:
             endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
+            logger.debug(
+                "Deleting model endpoint resources from the SQL tables",
+                endpoint_id=endpoint_id,
+                project=self.project,
+            )
             # Delete last analyzed records
             self._delete_last_analyzed(endpoint_id=endpoint_id)
@@ -592,6 +602,16 @@ class SQLStoreBase(StoreBase):
             # Delete model endpoint record
             self.delete_model_endpoint(endpoint_id=endpoint_id)
+            logger.debug(
+                "Successfully deleted model endpoint resources",
+                endpoint_id=endpoint_id,
+                project=self.project,
+            )
+        logger.debug(
+            "Successfully deleted model monitoring endpoints resources from the SQL tables",
+            project=self.project,
+        )
     def get_model_endpoint_metrics(
         self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType
@@ -615,7 +635,7 @@ class SQLStoreBase(StoreBase):
         # Note: the block below does not use self._get, as we need here all the
         # results, not only `one_or_none`.
-        with sqlalchemy.orm.Session(self._engine) as session:
+        with sqlalchemy.orm.Session(self.engine) as session:
             metric_rows = (
                 session.query(table)  # pyright: ignore[reportOptionalCall]
                 .filter(table.endpoint_id == endpoint_id)

mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import http
 import json
 import typing
 from dataclasses import dataclass
@@ -20,6 +20,7 @@ from http import HTTPStatus
 import v3io.dataplane
 import v3io.dataplane.output
 import v3io.dataplane.response
+from v3io.dataplane import Client as V3IOClient
 import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.model_monitoring as mm_schemas
@@ -34,11 +35,11 @@ fields_to_encode_decode = [
 ]
 _METRIC_FIELDS: list[str] = [
-    mm_schemas.WriterEvent.APPLICATION_NAME,
-    mm_schemas.MetricData.METRIC_NAME,
-    mm_schemas.MetricData.METRIC_VALUE,
-    mm_schemas.WriterEvent.START_INFER_TIME,
-    mm_schemas.WriterEvent.END_INFER_TIME,
+    mm_schemas.WriterEvent.APPLICATION_NAME.value,
+    mm_schemas.MetricData.METRIC_NAME.value,
+    mm_schemas.MetricData.METRIC_VALUE.value,
+    mm_schemas.WriterEvent.START_INFER_TIME.value,
+    mm_schemas.WriterEvent.END_INFER_TIME.value,
 ]
@@ -100,13 +101,18 @@ class KVStoreBase(StoreBase):
         project: str,
     ) -> None:
         super().__init__(project=project)
-        # Initialize a V3IO client instance
-        self.client = mlrun.utils.v3io_clients.get_v3io_client(
-            endpoint=mlrun.mlconf.v3io_api,
-        )
+        self._client = None
         # Get the KV table path and container
         self.path, self.container = self._get_path_and_container()
+    @property
+    def client(self) -> V3IOClient:
+        if not self._client:
+            self._client = mlrun.utils.v3io_clients.get_v3io_client(
+                endpoint=mlrun.mlconf.v3io_api,
+            )
+        return self._client
     def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
         """
         Create a new endpoint record in the KV table.
@@ -226,24 +232,8 @@ class KVStoreBase(StoreBase):
         labels: list[str] = None,
         top_level: bool = None,
         uids: list = None,
+        include_stats: bool = None,
     ) -> list[dict[str, typing.Any]]:
-        """
-        Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
-        By default, when no filters are applied, all available model endpoints for the given project will
-        be listed.
-        :param model:           The name of the model to filter by.
-        :param function:        The name of the function to filter by.
-        :param labels:          A list of labels to filter by. Label filters work by either filtering a specific value
-                                of a label (i.e. list("key=value")) or by looking for the existence of a given
-                                key (i.e. "key").
-        :param top_level:       If True will return only routers and endpoint that are NOT children of any router.
-        :param uids:            List of model endpoint unique ids to include in the result.
-        :return: A list of model endpoint dictionaries.
-        """
         # # Initialize an empty model endpoints list
         endpoint_list = []
@@ -283,6 +273,10 @@ class KVStoreBase(StoreBase):
             endpoint_dict = self.get_model_endpoint(
                 endpoint_id=endpoint_id,
             )
+            if not include_stats:
+                # Exclude these fields when listing model endpoints to avoid returning too much data (ML-6594)
+                endpoint_dict.pop(mm_schemas.EventFieldType.FEATURE_STATS)
+                endpoint_dict.pop(mm_schemas.EventFieldType.CURRENT_STATS)
             if labels and not self._validate_labels(
                 endpoint_dict=endpoint_dict, labels=labels
@@ -297,6 +291,10 @@ class KVStoreBase(StoreBase):
         """
         Delete all model endpoints resources in V3IO KV.
         """
+        logger.debug(
+            "Deleting model monitoring endpoints resources in V3IO KV",
+            project=self.project,
+        )
         endpoints = self.list_model_endpoints()
@@ -307,10 +305,22 @@ class KVStoreBase(StoreBase):
                 endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
             else:
                 endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
+            logger.debug(
+                "Deleting model endpoint resources from the V3IO KV table",
+                endpoint_id=endpoint_id,
+                project=self.project,
+            )
             self.delete_model_endpoint(
                 endpoint_id,
             )
+        logger.debug(
+            "Successfully deleted model monitoring endpoints from the V3IO KV table",
+            project=self.project,
+        )
         # Delete remain records in the KV
         all_records = self.client.kv.new_cursor(
             container=self.container,
@@ -362,7 +372,7 @@ class KVStoreBase(StoreBase):
             table_path = self._get_results_table_path(endpoint_id)
             key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
             metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
-            attributes = {metric_name: json.dumps(event)}
+            attributes = {metric_name: self._encode_field(json.dumps(event))}
         else:
             raise ValueError(f"Invalid {kind = }")
@@ -420,20 +430,23 @@ class KVStoreBase(StoreBase):
         """
         try:
-            data = self.client.kv.get(
+            response = self.client.kv.get(
                 container=self._get_monitoring_schedules_container(
                     project_name=self.project
                 ),
                 table_path=endpoint_id,
                 key=application_name,
             )
-            return data.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
+            return response.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
         except v3io.dataplane.response.HttpResponseError as err:
-            logger.debug("Error while getting last analyzed time", err=err)
-            raise mlrun.errors.MLRunNotFoundError(
-                f"No last analyzed value has been found for {application_name} "
-                f"that processes model endpoint {endpoint_id}",
-            )
+            if err.status_code == http.HTTPStatus.NOT_FOUND:
+                logger.debug("Last analyzed time not found", err=err)
+                raise mlrun.errors.MLRunNotFoundError(
+                    f"No last analyzed value has been found for {application_name} "
+                    f"that processes model endpoint {endpoint_id}",
+                )
+            logger.error("Error while getting last analyzed time", err=err)
+            raise err
     def update_last_analyzed(
         self, endpoint_id: str, application_name: str, last_analyzed: int

mlrun/model_monitoring/db/tsdb/__init__.py CHANGED Viewed

@@ -57,7 +57,7 @@ class ObjectTSDBFactory(enum.Enum):
         :param value: Provided enum (invalid) value.
         """
         valid_values = list(cls.__members__.keys())
-        raise mlrun.errors.MLRunInvalidMMStoreType(
+        raise mlrun.errors.MLRunInvalidMMStoreTypeError(
             f"{value} is not a valid tsdb, please choose a valid value: %{valid_values}."
         )
@@ -76,6 +76,8 @@ def get_tsdb_connector(
     :return: `TSDBConnector` object. The main goal of this object is to handle different operations on the
              TSDB connector such as updating drift metrics or write application record result.
+    :raise: `MLRunInvalidMMStoreTypeError` if the user didn't provide TSDB connection
+            or the provided TSDB connection is invalid.
     """
     tsdb_connection_string = (
@@ -91,7 +93,10 @@ def get_tsdb_connector(
     elif tsdb_connection_string and tsdb_connection_string == "v3io":
         tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.V3IO_TSDB
     else:
-        tsdb_connector_type = None
+        raise mlrun.errors.MLRunInvalidMMStoreTypeError(
+            "You must provide a valid tsdb store connection by using "
+            "set_model_monitoring_credentials API."
+        )
     # Get connector type value from ObjectTSDBFactory enum class
     tsdb_connector_factory = ObjectTSDBFactory(tsdb_connector_type)

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -15,8 +15,10 @@
 import typing
 from abc import ABC, abstractmethod
 from datetime import datetime
+from typing import Union
 import pandas as pd
+import pydantic
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.model_monitoring.db.tsdb.helpers
@@ -27,7 +29,7 @@ from mlrun.utils import logger
 class TSDBConnector(ABC):
     type: typing.ClassVar[str]
-    def __init__(self, project: str):
+    def __init__(self, project: str) -> None:
         """
         Initialize a new TSDB connector. The connector is used to interact with the TSDB and store monitoring data.
         At the moment we have 3 different types of monitoring data:
@@ -42,11 +44,11 @@ class TSDBConnector(ABC):
         writer.
         :param project: the name of the project.
         """
         self.project = project
-    def apply_monitoring_stream_steps(self, graph):
+    @abstractmethod
+    def apply_monitoring_stream_steps(self, graph) -> None:
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -58,6 +60,15 @@ class TSDBConnector(ABC):
         """
         pass
+    @abstractmethod
+    def handle_model_error(self, graph, **kwargs) -> None:
+        """
+        Adds a branch to the stream pod graph to handle events that
+        arrive with errors from the model server and saves them to the error TSDB table.
+        The first step that generates by this method should come after `ForwardError` step.
+        """
+    @abstractmethod
     def write_application_event(
         self,
         event: dict,
@@ -69,13 +80,14 @@ class TSDBConnector(ABC):
         :raise mlrun.errors.MLRunRuntimeError: If an error occurred while writing the event.
         """
+    @abstractmethod
     def delete_tsdb_resources(self):
         """
         Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
         """
         pass
+    @abstractmethod
     def get_model_endpoint_real_time_metrics(
         self,
         endpoint_id: str,
@@ -102,6 +114,7 @@ class TSDBConnector(ABC):
         """
         pass
+    @abstractmethod
     def create_tables(self) -> None:
         """
         Create the TSDB tables using the TSDB connector. At the moment we support 3 types of tables:
@@ -177,6 +190,117 @@ class TSDBConnector(ABC):
         :return:                   Metric values object or no data object.
         """
+    @abstractmethod
+    def get_last_request(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        """
+        Fetches data from the predictions TSDB table and returns the most recent request
+        timestamp for each specified endpoint.
+        :param endpoint_ids:    A list of model endpoint identifiers.
+        :param start:           The start time for the query.
+        :param end:             The end time for the query.
+        :return: A pd.DataFrame containing the columns [endpoint_id, last_request, last_latency].
+        If an endpoint has not been invoked within the specified time range, it will not appear in the result.
+        """
+    @abstractmethod
+    def get_drift_status(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "now-24h",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        """
+        Fetches data from the app-results TSDB table and returns the highest status among all
+        the result in the provided time range, which by default is the last 24 hours, for each specified endpoint.
+        :param endpoint_ids:    A list of model endpoint identifiers.
+        :param start:           The start time for the query.
+        :param end:             The end time for the query.
+        :return: A pd.DataFrame containing the columns [result_status, endpoint_id].
+        If an endpoint has not been monitored within the specified time range (last 24 hours),
+        it will not appear in the result.
+        """
+    @abstractmethod
+    def get_metrics_metadata(
+        self,
+        endpoint_id: str,
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        """
+        Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoint.
+        :param endpoint_id:        The model endpoint identifier.
+        :param start:              The start time of the query.
+        :param end:                The end time of the query.
+        :return: A pd.DataFrame containing all distinct metrics for the specified endpoint within the given time range.
+        Containing the columns [application_name, metric_name, endpoint_id]
+        """
+    @abstractmethod
+    def get_results_metadata(
+        self,
+        endpoint_id: str,
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        """
+        Fetches distinct results metadata from the app-results TSDB table for a specified model endpoint.
+        :param endpoint_id:        The model endpoint identifier.
+        :param start:              The start time of the query.
+        :param end:                The end time of the query.
+        :return: A pd.DataFrame containing all distinct results for the specified endpoint within the given time range.
+        Containing the columns [application_name, result_name, result_kind, endpoint_id]
+        """
+    @abstractmethod
+    def get_error_count(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        """
+        Fetches data from the error TSDB table and returns the error count for each specified endpoint.
+        :param endpoint_ids:    A list of model endpoint identifiers.
+        :param start:           The start time for the query.
+        :param end:             The end time for the query.
+        :return: A pd.DataFrame containing the columns [error_count, endpoint_id].
+        If an endpoint have not raised error within the specified time range, it will not appear in the result.
+        """
+    @abstractmethod
+    def get_avg_latency(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        """
+        Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
+        :param endpoint_ids:    A list of model endpoint identifiers.
+        :param start:           The start time for the query.
+        :param end:             The end time for the query.
+        :return: A pd.DataFrame containing the columns [avg_latency, endpoint_id].
+        If an endpoint has not been invoked within the specified time range, it will not appear in the result.
+        """
     @staticmethod
     def df_to_metrics_values(
         *,
@@ -286,19 +410,27 @@ class TSDBConnector(ABC):
             full_name = mlrun.model_monitoring.helpers._compose_full_name(
                 project=project, app=app_name, name=name
             )
-            metrics_values.append(
-                mm_schemas.ModelEndpointMonitoringResultValues(
+            try:
+                metrics_values.append(
+                    mm_schemas.ModelEndpointMonitoringResultValues(
+                        full_name=full_name,
+                        result_kind=result_kind,
+                        values=list(
+                            zip(
+                                sub_df.index,
+                                sub_df[mm_schemas.ResultData.RESULT_VALUE],
+                                sub_df[mm_schemas.ResultData.RESULT_STATUS],
+                            )
+                        ),  # pyright: ignore[reportArgumentType]
+                    )
+                )
+            except pydantic.ValidationError:
+                logger.exception(
+                    "Failed to convert data-frame into `ModelEndpointMonitoringResultValues`",
                     full_name=full_name,
-                    result_kind=result_kind,
-                    values=list(
-                        zip(
-                            sub_df.index,
-                            sub_df[mm_schemas.ResultData.RESULT_VALUE],
-                            sub_df[mm_schemas.ResultData.RESULT_STATUS],
-                        )
-                    ),  # pyright: ignore[reportArgumentType]
+                    sub_df_json=sub_df.to_json(),
                 )
-            )
+                raise
             del metrics_without_data[full_name]
         for metric in metrics_without_data.values():

mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl