PyPI - mlrun - Versions diffs - 1.8.0rc4__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl - Mend

mlrun 1.8.0rc4py3-none-any.whl → 1.8.0rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (69) hide show

mlrun/__init__.py +4 -3
mlrun/alerts/alert.py +129 -2
mlrun/artifacts/__init__.py +1 -1
mlrun/artifacts/base.py +12 -1
mlrun/artifacts/document.py +59 -38
mlrun/common/model_monitoring/__init__.py +0 -2
mlrun/common/model_monitoring/helpers.py +0 -28
mlrun/common/schemas/__init__.py +1 -4
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/artifact.py +4 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/__init__.py +0 -6
mlrun/common/schemas/model_monitoring/constants.py +11 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
mlrun/common/schemas/notification.py +6 -0
mlrun/config.py +0 -2
mlrun/datastore/datastore_profile.py +57 -17
mlrun/datastore/vectorstore.py +67 -59
mlrun/db/base.py +22 -18
mlrun/db/factory.py +0 -3
mlrun/db/httpdb.py +122 -150
mlrun/db/nopdb.py +33 -17
mlrun/execution.py +43 -29
mlrun/model.py +7 -0
mlrun/model_monitoring/__init__.py +3 -2
mlrun/model_monitoring/api.py +40 -43
mlrun/model_monitoring/applications/_application_steps.py +4 -2
mlrun/model_monitoring/applications/base.py +65 -6
mlrun/model_monitoring/applications/context.py +64 -33
mlrun/model_monitoring/applications/evidently_base.py +0 -1
mlrun/model_monitoring/applications/histogram_data_drift.py +2 -6
mlrun/model_monitoring/controller.py +43 -37
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/tsdb/base.py +2 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
mlrun/model_monitoring/helpers.py +12 -66
mlrun/model_monitoring/stream_processing.py +83 -270
mlrun/model_monitoring/writer.py +1 -10
mlrun/projects/project.py +87 -74
mlrun/runtimes/nuclio/function.py +7 -6
mlrun/runtimes/nuclio/serving.py +7 -1
mlrun/serving/routers.py +158 -145
mlrun/serving/server.py +6 -0
mlrun/serving/states.py +2 -0
mlrun/serving/v2_serving.py +69 -60
mlrun/utils/helpers.py +14 -30
mlrun/utils/notifications/notification/mail.py +36 -9
mlrun/utils/notifications/notification_pusher.py +34 -13
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/METADATA +5 -4
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/RECORD +56 -69
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
mlrun/model_monitoring/db/stores/base/store.py +0 -154
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
mlrun/model_monitoring/model_endpoint.py +0 -120
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc4.dist-info → mlrun-1.8.0rc6.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -12,26 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import socket
-from typing import Any, Optional, cast
+from typing import Any, Optional, Protocol, cast
+import nuclio.request
 import numpy as np
 import pandas as pd
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.errors
 import mlrun.feature_store as fstore
 import mlrun.features
 import mlrun.serving
 import mlrun.utils
 from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
-from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
+from mlrun.common.model_monitoring.helpers import FeatureStats
+from mlrun.common.schemas import ModelEndpoint
 from mlrun.model_monitoring.helpers import (
     calculate_inputs_statistics,
-    get_endpoint_record,
 )
-from mlrun.model_monitoring.model_endpoint import ModelEndpoint
+class _ArtifactsLogger(Protocol):
+    """
+    Classes that implement this protocol are :code:`MlrunProject` and :code:`MLClientCtx`.
+    """
+    def log_artifact(self, *args, **kwargs) -> Artifact: ...
+    def log_dataset(self, *args, **kwargs) -> DatasetArtifact: ...
+    def log_model(self, *args, **kwargs) -> ModelArtifact: ...
 class MonitoringApplicationContext:
@@ -52,6 +62,7 @@ class MonitoringApplicationContext:
     :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
     :param latest_request:          (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
     :param endpoint_id:             (str) ID of the monitored model endpoint
+    :param endpoint_name:           (str) Name of the monitored model endpoint
     :param output_stream_uri:       (str) URI of the output stream for results
     :param model_endpoint:          (ModelEndpoint) The model endpoint object.
     :param feature_names:           (list[str]) List of models feature names.
@@ -60,36 +71,57 @@ class MonitoringApplicationContext:
                                     and a list of extra data items.
     """
+    _logger_name = "monitoring-application"
     def __init__(
         self,
         *,
-        graph_context: mlrun.serving.GraphContext,
         application_name: str,
         event: dict[str, Any],
-        model_endpoint_dict: dict[str, ModelEndpoint],
+        model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
+        logger: Optional[mlrun.utils.Logger] = None,
+        graph_context: Optional[mlrun.serving.GraphContext] = None,
+        artifacts_logger: Optional[_ArtifactsLogger] = None,
     ) -> None:
         """
-        Initialize a `MonitoringApplicationContext` object.
+        Initialize a :code:`MonitoringApplicationContext` object.
         Note: this object should not be instantiated manually.
         :param application_name:    The application name.
         :param event:               The instance data dictionary.
-        :param model_endpoint_dict: Dictionary of model endpoints.
+        :param model_endpoint_dict: Optional - dictionary of model endpoints.
+        :param logger:              Optional - MLRun logger instance.
+        :param graph_context:       Optional - GraphContext instance.
+        :param artifacts_logger:    Optional - an object that can log artifacts,
+                                    typically :py:class:`~mlrun.projects.MlrunProject` or
+                                    :py:class:`~mlrun.execution.MLClientCtx`.
         """
         self.application_name = application_name
-        self.project_name = graph_context.project
-        self.project = mlrun.load_project(url=self.project_name)
+        if graph_context:
+            self.project_name = graph_context.project
+            self.project = mlrun.load_project(url=self.project_name)
+        else:
+            self.project = cast("mlrun.MlrunProject", mlrun.get_current_project())
+            self.project_name = self.project.name
+        self._artifacts_logger: _ArtifactsLogger = artifacts_logger or self.project
         # MLRun Logger
-        self.logger = mlrun.utils.create_logger(
+        self.logger = logger or mlrun.utils.create_logger(
             level=mlrun.mlconf.log_level,
             formatter_kind=mlrun.mlconf.log_formatter,
-            name="monitoring-application",
+            name=self._logger_name,
         )
         # Nuclio logger - `nuclio.request.Logger`.
-        # Note: this logger does not accept keyword arguments.
-        self.nuclio_logger = graph_context.logger
+        # Note: this logger accepts keyword arguments only in its `_with` methods, e.g. `info_with`.
+        self.nuclio_logger = (
+            graph_context.logger
+            if graph_context
+            else nuclio.request.Logger(
+                level=mlrun.mlconf.log_level, name=self._logger_name
+            )
+        )
         # event data
         self.start_infer_time = pd.Timestamp(
@@ -101,6 +133,9 @@ class MonitoringApplicationContext:
         self.endpoint_id = cast(
             str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
         )
+        self.endpoint_name = cast(
+            str, event.get(mm_constants.ApplicationEvent.ENDPOINT_NAME)
+        )
         self.output_stream_uri = cast(
             str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
         )
@@ -113,8 +148,8 @@ class MonitoringApplicationContext:
         # Persistent data - fetched when needed
         self._sample_df: Optional[pd.DataFrame] = None
-        self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
-            self.endpoint_id
+        self._model_endpoint: Optional[ModelEndpoint] = (
+            model_endpoint_dict.get(self.endpoint_id) if model_endpoint_dict else None
         )
     def _get_default_labels(self) -> dict[str, str]:
@@ -133,7 +168,7 @@ class MonitoringApplicationContext:
     def sample_df(self) -> pd.DataFrame:
         if self._sample_df is None:
             feature_set = fstore.get_feature_set(
-                self.model_endpoint.status.monitoring_feature_set_uri
+                self.model_endpoint.spec.monitoring_feature_set_uri
             )
             features = [f"{feature_set.metadata.name}.*"]
             vector = fstore.FeatureVector(
@@ -155,16 +190,18 @@ class MonitoringApplicationContext:
     @property
     def model_endpoint(self) -> ModelEndpoint:
         if not self._model_endpoint:
-            self._model_endpoint = ModelEndpoint.from_flat_dict(
-                get_endpoint_record(self.project_name, self.endpoint_id)
+            self._model_endpoint = mlrun.db.get_run_db().get_model_endpoint(
+                name=self.endpoint_name,
+                project=self.project_name,
+                endpoint_id=self.endpoint_id,
+                feature_analysis=True,
             )
         return self._model_endpoint
     @property
     def feature_stats(self) -> FeatureStats:
         if not self._feature_stats:
-            self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
-            pad_features_hist(self._feature_stats)
+            self._feature_stats = self.model_endpoint.spec.feature_stats
         return self._feature_stats
     @property
@@ -179,18 +216,12 @@ class MonitoringApplicationContext:
     @property
     def feature_names(self) -> list[str]:
         """The feature names of the model"""
-        feature_names = self.model_endpoint.spec.feature_names
-        return (
-            feature_names
-            if isinstance(feature_names, list)
-            else json.loads(feature_names)
-        )
+        return self.model_endpoint.spec.feature_names
     @property
     def label_names(self) -> list[str]:
         """The label names of the model"""
-        label_names = self.model_endpoint.spec.label_names
-        return label_names if isinstance(label_names, list) else json.loads(label_names)
+        return self.model_endpoint.spec.label_names
     @property
     def model(self) -> tuple[str, ModelArtifact, dict]:
@@ -237,7 +268,7 @@ class MonitoringApplicationContext:
         See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
         """
         labels = self._add_default_labels(labels)
-        return self.project.log_artifact(
+        return self._artifacts_logger.log_artifact(
             item,
             body=body,
             tag=tag,
@@ -272,7 +303,7 @@ class MonitoringApplicationContext:
         See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
         """
         labels = self._add_default_labels(labels)
-        return self.project.log_dataset(
+        return self._artifacts_logger.log_dataset(
             key,
             df,
             tag=tag,
@@ -317,7 +348,7 @@ class MonitoringApplicationContext:
         See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
         """
         labels = self._add_default_labels(labels)
-        return self.project.log_model(
+        return self._artifacts_logger.log_model(
             key,
             body=body,
             framework=framework,

mlrun/model_monitoring/applications/evidently_base.py CHANGED Viewed

@@ -76,7 +76,6 @@ class EvidentlyModelMonitoringApplicationBase(
         :param evidently_workspace_path:    (str) The path to the Evidently workspace.
         :param evidently_project_id:        (str) The ID of the Evidently project.
         """
         # TODO : more then one project (mep -> project)

mlrun/model_monitoring/applications/histogram_data_drift.py CHANGED Viewed

@@ -113,7 +113,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
         project.enable_model_monitoring()
-    To avoid it, pass `deploy_histogram_data_drift_app=False`.
+    To avoid it, pass :code:`deploy_histogram_data_drift_app=False`.
     """
     NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
@@ -331,8 +331,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
         )
     def do_tracking(
-        self,
-        monitoring_context: mm_context.MonitoringApplicationContext,
+        self, monitoring_context: mm_context.MonitoringApplicationContext
     ) -> list[
         Union[
             mm_results.ModelMonitoringApplicationResult,
@@ -342,9 +341,6 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
     ]:
         """
         Calculate and return the data drift metrics, averaged over the features.
-        Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
-        function arguments.
         """
         monitoring_context.logger.debug("Starting to run the application")
         if not monitoring_context.feature_stats:

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -19,7 +19,7 @@ import os
 from collections.abc import Iterator
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Any, NamedTuple, Optional, cast
+from typing import NamedTuple, Optional, cast
 import nuclio_sdk
@@ -27,6 +27,7 @@ import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.feature_store as fstore
 import mlrun.model_monitoring
+from mlrun.common.schemas import EndpointType
 from mlrun.datastore import get_stream_pusher
 from mlrun.errors import err_to_str
 from mlrun.model_monitoring.db._schedules import ModelMonitoringSchedulesFile
@@ -65,7 +66,7 @@ class _BatchWindow:
         self._start = self._get_last_analyzed()
     def _get_saved_last_analyzed(self) -> Optional[int]:
-        return self._db.get_application_time(self._application)
+        return cast(int, self._db.get_application_time(self._application))
     def _update_last_analyzed(self, last_analyzed: int) -> None:
         self._db.update_application_time(
@@ -161,18 +162,20 @@ class _BatchWindowGenerator(AbstractContextManager):
         )
     @classmethod
-    def _get_last_updated_time(cls, last_request: str, has_stream: bool) -> int:
+    def _get_last_updated_time(
+        cls, last_request: datetime.datetime, not_batch_endpoint: bool
+    ) -> int:
         """
         Get the last updated time of a model endpoint.
         """
         last_updated = int(
-            cls._date_string2timestamp(last_request)
+            last_request.timestamp()
             - cast(
                 float,
                 mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
         )
-        if not has_stream:
+        if not not_batch_endpoint:
             # If the endpoint does not have a stream, `last_updated` should be
             # the minimum between the current time and the last updated time.
             # This compensates for the bumping mechanism - see
@@ -183,17 +186,13 @@ class _BatchWindowGenerator(AbstractContextManager):
             )
         return last_updated
-    @staticmethod
-    def _date_string2timestamp(date_string: str) -> int:
-        return int(datetime.datetime.fromisoformat(date_string).timestamp())
     def get_intervals(
         self,
         *,
         application: str,
-        first_request: str,
-        last_request: str,
-        has_stream: bool,
+        first_request: datetime.datetime,
+        last_request: datetime.datetime,
+        not_batch_endpoint: bool,
     ) -> Iterator[_Interval]:
         """
         Get the batch window for a specific endpoint and application.
@@ -204,8 +203,8 @@ class _BatchWindowGenerator(AbstractContextManager):
             schedules_file=self._schedules_file,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request, has_stream),
-            first_request=self._date_string2timestamp(first_request),
+            last_updated=self._get_last_updated_time(last_request, not_batch_endpoint),
+            first_request=int(first_request.timestamp()),
         )
         yield from batch_window.get_intervals()
@@ -235,8 +234,6 @@ class MonitoringApplicationController:
         logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
-        self.db = mlrun.model_monitoring.get_store_object(project=self.project)
         self._window_length = _get_window_length()
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
@@ -253,19 +250,16 @@ class MonitoringApplicationController:
         return access_key
     @staticmethod
-    def _should_monitor_endpoint(endpoint: dict[str, Any]) -> bool:
+    def _should_monitor_endpoint(endpoint: mlrun.common.schemas.ModelEndpoint) -> bool:
         return (
-            # Is the model endpoint active?
-            endpoint[mm_constants.EventFieldType.ACTIVE]
             # Is the model endpoint monitored?
-            and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
-            == mm_constants.ModelMonitoringMode.enabled
+            endpoint.status.monitoring_mode == mm_constants.ModelMonitoringMode.enabled
             # Was the model endpoint called? I.e., are the first and last requests nonempty?
-            and endpoint[mm_constants.EventFieldType.FIRST_REQUEST]
-            and endpoint[mm_constants.EventFieldType.LAST_REQUEST]
+            and endpoint.status.first_request
+            and endpoint.status.last_request
             # Is the model endpoint not a router endpoint? Router endpoint has no feature stats
-            and int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
-            != mm_constants.EndpointType.ROUTER
+            and endpoint.metadata.endpoint_type.value
+            != mm_constants.EndpointType.ROUTER.value
         )
     def run(self) -> None:
@@ -281,7 +275,10 @@ class MonitoringApplicationController:
         logger.info("Start running monitoring controller")
         try:
             applications_names = []
-            endpoints = self.db.list_model_endpoints(include_stats=True)
+            endpoints_list = mlrun.db.get_run_db().list_model_endpoints(
+                project=self.project, tsdb_metrics=True
+            )
+            endpoints = endpoints_list.endpoints
             if not endpoints:
                 logger.info("No model endpoints found", project=self.project)
                 return
@@ -333,12 +330,19 @@ class MonitoringApplicationController:
                         model_monitoring_access_key=self.model_monitoring_access_key,
                         storage_options=self.storage_options,
                     )
+                else:
+                    logger.debug(
+                        "Skipping endpoint, not ready or not suitable for monitoring",
+                        endpoint_id=endpoint.metadata.uid,
+                        endpoint_name=endpoint.metadata.name,
+                    )
+        logger.info("Finished running monitoring controller")
     @classmethod
     def model_endpoint_process(
         cls,
         project: str,
-        endpoint: dict,
+        endpoint: mlrun.common.schemas.ModelEndpoint,
         applications_names: list[str],
         window_length: int,
         model_monitoring_access_key: str,
@@ -356,11 +360,11 @@ class MonitoringApplicationController:
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
         :param storage_options:             (dict) Storage options for reading the infer parquet files.
         """
-        endpoint_id = endpoint[mm_constants.EventFieldType.UID]
-        has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
-        m_fs = fstore.get_feature_set(
-            endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
+        endpoint_id = endpoint.metadata.uid
+        not_batch_endpoint = not (
+            endpoint.metadata.endpoint_type == EndpointType.BATCH_EP
         )
+        m_fs = fstore.get_feature_set(endpoint.spec.monitoring_feature_set_uri)
         try:
             with _BatchWindowGenerator(
                 project=project, endpoint_id=endpoint_id, window_length=window_length
@@ -371,11 +375,9 @@ class MonitoringApplicationController:
                         end_infer_time,
                     ) in batch_window_generator.get_intervals(
                         application=application,
-                        first_request=endpoint[
-                            mm_constants.EventFieldType.FIRST_REQUEST
-                        ],
-                        last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
-                        has_stream=has_stream,
+                        first_request=endpoint.status.first_request,
+                        last_request=endpoint.status.last_request,
+                        not_batch_endpoint=not_batch_endpoint,
                     ):
                         df = m_fs.to_dataframe(
                             start_time=start_infer_time,
@@ -401,15 +403,17 @@ class MonitoringApplicationController:
                                 start_infer_time=start_infer_time,
                                 end_infer_time=end_infer_time,
                                 endpoint_id=endpoint_id,
+                                endpoint_name=endpoint.metadata.name,
                                 project=project,
                                 applications_names=[application],
                                 model_monitoring_access_key=model_monitoring_access_key,
                             )
+                logger.info("Finished processing endpoint", endpoint_id=endpoint_id)
         except Exception:
             logger.exception(
                 "Encountered an exception",
-                endpoint_id=endpoint[mm_constants.EventFieldType.UID],
+                endpoint_id=endpoint.metadata.uid,
             )
     @staticmethod
@@ -417,6 +421,7 @@ class MonitoringApplicationController:
         start_infer_time: datetime.datetime,
         end_infer_time: datetime.datetime,
         endpoint_id: str,
+        endpoint_name: str,
         project: str,
         applications_names: list[str],
         model_monitoring_access_key: str,
@@ -440,6 +445,7 @@ class MonitoringApplicationController:
                 sep=" ", timespec="microseconds"
             ),
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
+            mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
                 function_name=mm_constants.MonitoringFunctionNames.WRITER,

mlrun/model_monitoring/db/__init__.py CHANGED Viewed

@@ -12,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .stores import ObjectStoreFactory, get_store_object
-from .stores.base import StoreBase
 from .tsdb import get_tsdb_connector
 from .tsdb.base import TSDBConnector

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -47,7 +47,7 @@ class TSDBConnector(ABC):
         self.project = project
     @abstractmethod
-    def apply_monitoring_stream_steps(self, graph) -> None:
+    def apply_monitoring_stream_steps(self, graph, **kwargs) -> None:
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -294,6 +294,7 @@ class TSDBConnector(ABC):
     ) -> pd.DataFrame:
         """
         Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
+        in the provided time range, which by default is the last 24 hours.
         :param endpoint_ids:    A list of model endpoint identifiers.
         :param start:           The start time for the query.

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -164,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
     def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
         return datetime.fromisoformat(val) if isinstance(val, str) else val
-    def apply_monitoring_stream_steps(self, graph):
+    def apply_monitoring_stream_steps(self, graph, **kwarg):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -701,6 +701,7 @@ class TDEngineConnector(TSDBConnector):
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
         tsdb_batching_max_events: int = 1000,
         tsdb_batching_timeout_secs: int = 30,
         sample_window: int = 10,
+        aggregate_windows: Optional[list[str]] = None,
+        aggregate_period: str = "1m",
+        **kwarg,
     ):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
         - endpoint_features (Prediction and feature names and values)
         - custom_metrics (user-defined metrics)
         """
+        aggregate_windows = aggregate_windows or ["5m", "1h"]
+        # Calculate number of predictions and average latency
+        def apply_storey_aggregations():
+            # Calculate number of predictions for each window (5 min and 1 hour by default)
+            graph.add_step(
+                class_name="storey.AggregateByKey",
+                aggregates=[
+                    {
+                        "name": EventFieldType.LATENCY,
+                        "column": EventFieldType.LATENCY,
+                        "operations": ["count", "avg"],
+                        "windows": aggregate_windows,
+                        "period": aggregate_period,
+                    }
+                ],
+                name=EventFieldType.LATENCY,
+                after="MapFeatureNames",
+                step_name="Aggregates",
+                table=".",
+                key_field=EventFieldType.ENDPOINT_ID,
+            )
+            # Calculate average latency time for each window (5 min and 1 hour by default)
+            graph.add_step(
+                class_name="storey.Rename",
+                mapping={
+                    "latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
+                    "latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
+                },
+                name="Rename",
+                after=EventFieldType.LATENCY,
+            )
+        apply_storey_aggregations()
         # Write latency per prediction, labeled by endpoint ID only
         graph.add_step(
             "storey.TSDBTarget",
@@ -853,6 +889,7 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.PREDICTIONS,
@@ -864,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
         )
         if not df.empty:
             df.dropna(inplace=True)
+            df.rename(
+                columns={
+                    f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
+                },
+                inplace=True,
+            )
         return df.reset_index(drop=True)

mlrun 1.8.0rc4__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc4py3-none-any.whl → 1.8.0rc6py3-none-any.whl