PyPI - mlrun - Versions diffs - 1.8.0rc5__py3-none-any.whl → 1.8.0rc9__py3-none-any.whl - Mend

mlrun 1.8.0rc5py3-none-any.whl → 1.8.0rc9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (74) hide show

mlrun/__init__.py +1 -0
mlrun/artifacts/__init__.py +1 -1
mlrun/artifacts/base.py +21 -1
mlrun/artifacts/document.py +62 -39
mlrun/artifacts/manager.py +12 -5
mlrun/common/constants.py +1 -0
mlrun/common/model_monitoring/__init__.py +0 -2
mlrun/common/model_monitoring/helpers.py +0 -28
mlrun/common/schemas/__init__.py +2 -4
mlrun/common/schemas/alert.py +77 -1
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/__init__.py +0 -6
mlrun/common/schemas/model_monitoring/constants.py +11 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
mlrun/common/schemas/notification.py +6 -0
mlrun/common/schemas/project.py +3 -0
mlrun/config.py +2 -3
mlrun/datastore/datastore_profile.py +57 -17
mlrun/datastore/sources.py +1 -2
mlrun/datastore/store_resources.py +7 -2
mlrun/datastore/vectorstore.py +99 -62
mlrun/db/base.py +34 -20
mlrun/db/httpdb.py +249 -163
mlrun/db/nopdb.py +40 -17
mlrun/execution.py +14 -7
mlrun/feature_store/api.py +1 -0
mlrun/model.py +3 -0
mlrun/model_monitoring/__init__.py +3 -2
mlrun/model_monitoring/api.py +64 -53
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/applications/base.py +115 -15
mlrun/model_monitoring/applications/context.py +42 -24
mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
mlrun/model_monitoring/controller.py +43 -37
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/tsdb/base.py +2 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
mlrun/model_monitoring/helpers.py +78 -66
mlrun/model_monitoring/stream_processing.py +83 -270
mlrun/model_monitoring/writer.py +1 -10
mlrun/projects/pipelines.py +37 -1
mlrun/projects/project.py +173 -70
mlrun/run.py +40 -0
mlrun/runtimes/nuclio/function.py +7 -6
mlrun/runtimes/nuclio/serving.py +9 -4
mlrun/serving/routers.py +158 -145
mlrun/serving/server.py +6 -0
mlrun/serving/states.py +21 -7
mlrun/serving/v2_serving.py +94 -68
mlrun/utils/helpers.py +23 -33
mlrun/utils/notifications/notification/mail.py +17 -6
mlrun/utils/notifications/notification_pusher.py +9 -5
mlrun/utils/regex.py +8 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/METADATA +2 -2
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/RECORD +61 -74
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
mlrun/model_monitoring/db/stores/base/store.py +0 -154
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
mlrun/model_monitoring/model_endpoint.py +0 -120
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import socket
 from typing import Any, Optional, Protocol, cast
@@ -28,12 +27,11 @@ import mlrun.features
 import mlrun.serving
 import mlrun.utils
 from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
-from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
+from mlrun.common.model_monitoring.helpers import FeatureStats
+from mlrun.common.schemas import ModelEndpoint
 from mlrun.model_monitoring.helpers import (
     calculate_inputs_statistics,
-    get_endpoint_record,
 )
-from mlrun.model_monitoring.model_endpoint import ModelEndpoint
 class _ArtifactsLogger(Protocol):
@@ -64,6 +62,7 @@ class MonitoringApplicationContext:
     :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
     :param latest_request:          (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
     :param endpoint_id:             (str) ID of the monitored model endpoint
+    :param endpoint_name:           (str) Name of the monitored model endpoint
     :param output_stream_uri:       (str) URI of the output stream for results
     :param model_endpoint:          (ModelEndpoint) The model endpoint object.
     :param feature_names:           (list[str]) List of models feature names.
@@ -82,10 +81,14 @@ class MonitoringApplicationContext:
         model_endpoint_dict: Optional[dict[str, ModelEndpoint]] = None,
         logger: Optional[mlrun.utils.Logger] = None,
         graph_context: Optional[mlrun.serving.GraphContext] = None,
+        context: Optional["mlrun.MLClientCtx"] = None,
         artifacts_logger: Optional[_ArtifactsLogger] = None,
+        sample_df: Optional[pd.DataFrame] = None,
+        feature_stats: Optional[FeatureStats] = None,
     ) -> None:
         """
-        Initialize a :code:`MonitoringApplicationContext` object.
+        The :code:`__init__` method initializes a :code:`MonitoringApplicationContext` object
+        and has the following attributes.
         Note: this object should not be instantiated manually.
         :param application_name:    The application name.
@@ -93,17 +96,27 @@ class MonitoringApplicationContext:
         :param model_endpoint_dict: Optional - dictionary of model endpoints.
         :param logger:              Optional - MLRun logger instance.
         :param graph_context:       Optional - GraphContext instance.
+        :param context:             Optional - MLClientCtx instance.
         :param artifacts_logger:    Optional - an object that can log artifacts,
                                     typically :py:class:`~mlrun.projects.MlrunProject` or
                                     :py:class:`~mlrun.execution.MLClientCtx`.
+        :param sample_df:           Optional - pandas data-frame as the current dataset.
+                                    When set, it replaces the data read from the offline source.
+        :param feature_stats:       Optional - statistics dictionary of the reference data.
+                                    When set, it overrides the model endpoint's feature stats.
         """
         self.application_name = application_name
         if graph_context:
             self.project_name = graph_context.project
             self.project = mlrun.load_project(url=self.project_name)
-        else:
-            self.project = cast("mlrun.MlrunProject", mlrun.get_current_project())
+        elif context:
+            potential_project = context.get_project_object()
+            if not potential_project:
+                raise mlrun.errors.MLRunValueError(
+                    "Could not load project from context"
+                )
+            self.project = potential_project
             self.project_name = self.project.name
         self._artifacts_logger: _ArtifactsLogger = artifacts_logger or self.project
@@ -134,29 +147,38 @@ class MonitoringApplicationContext:
         self.endpoint_id = cast(
             str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
         )
+        self.endpoint_name = cast(
+            str, event.get(mm_constants.ApplicationEvent.ENDPOINT_NAME)
+        )
         self.output_stream_uri = cast(
             str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
         )
-        self._feature_stats: Optional[FeatureStats] = None
+        self._feature_stats: Optional[FeatureStats] = feature_stats
         self._sample_df_stats: Optional[FeatureStats] = None
         # Default labels for the artifacts
         self._default_labels = self._get_default_labels()
         # Persistent data - fetched when needed
-        self._sample_df: Optional[pd.DataFrame] = None
+        self._sample_df: Optional[pd.DataFrame] = sample_df
         self._model_endpoint: Optional[ModelEndpoint] = (
             model_endpoint_dict.get(self.endpoint_id) if model_endpoint_dict else None
         )
     def _get_default_labels(self) -> dict[str, str]:
-        return {
+        labels = {
             mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
             mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
             mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
-            mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
         }
+        for key, value in [
+            (mlrun_constants.MLRunInternalLabels.endpoint_id, self.endpoint_id),
+            (mlrun_constants.MLRunInternalLabels.endpoint_name, self.endpoint_name),
+        ]:
+            if value:
+                labels[key] = value
+        return labels
     def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
         """Add the default labels to logged artifacts labels"""
@@ -166,7 +188,7 @@ class MonitoringApplicationContext:
     def sample_df(self) -> pd.DataFrame:
         if self._sample_df is None:
             feature_set = fstore.get_feature_set(
-                self.model_endpoint.status.monitoring_feature_set_uri
+                self.model_endpoint.spec.monitoring_feature_set_uri
             )
             features = [f"{feature_set.metadata.name}.*"]
             vector = fstore.FeatureVector(
@@ -188,16 +210,18 @@ class MonitoringApplicationContext:
     @property
     def model_endpoint(self) -> ModelEndpoint:
         if not self._model_endpoint:
-            self._model_endpoint = ModelEndpoint.from_flat_dict(
-                get_endpoint_record(self.project_name, self.endpoint_id)
+            self._model_endpoint = mlrun.db.get_run_db().get_model_endpoint(
+                name=self.endpoint_name,
+                project=self.project_name,
+                endpoint_id=self.endpoint_id,
+                feature_analysis=True,
             )
         return self._model_endpoint
     @property
     def feature_stats(self) -> FeatureStats:
         if not self._feature_stats:
-            self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
-            pad_features_hist(self._feature_stats)
+            self._feature_stats = self.model_endpoint.spec.feature_stats
         return self._feature_stats
     @property
@@ -212,18 +236,12 @@ class MonitoringApplicationContext:
     @property
     def feature_names(self) -> list[str]:
         """The feature names of the model"""
-        feature_names = self.model_endpoint.spec.feature_names
-        return (
-            feature_names
-            if isinstance(feature_names, list)
-            else json.loads(feature_names)
-        )
+        return self.model_endpoint.spec.feature_names
     @property
     def label_names(self) -> list[str]:
         """The label names of the model"""
-        label_names = self.model_endpoint.spec.label_names
-        return label_names if isinstance(label_names, list) else json.loads(label_names)
+        return self.model_endpoint.spec.label_names
     @property
     def model(self) -> tuple[str, ModelArtifact, dict]:

mlrun/model_monitoring/applications/histogram_data_drift.py CHANGED Viewed

@@ -227,7 +227,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
         :param metrics: the calculated metrics
         :param metrics_per_feature: metric calculated per feature
         :param monitoring_context:  context object for current monitoring application
-        :return: list of mm_results._ModelMonitoringApplicationStats for histogram data drift application
+        :returns: list of mm_results._ModelMonitoringApplicationStats for histogram data drift application
         """
         stats = []
         for stats_type in HistogramDataDriftApplication._STATS_TYPES:

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -19,7 +19,7 @@ import os
 from collections.abc import Iterator
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Any, NamedTuple, Optional, cast
+from typing import NamedTuple, Optional, cast
 import nuclio_sdk
@@ -27,6 +27,7 @@ import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.feature_store as fstore
 import mlrun.model_monitoring
+from mlrun.common.schemas import EndpointType
 from mlrun.datastore import get_stream_pusher
 from mlrun.errors import err_to_str
 from mlrun.model_monitoring.db._schedules import ModelMonitoringSchedulesFile
@@ -65,7 +66,7 @@ class _BatchWindow:
         self._start = self._get_last_analyzed()
     def _get_saved_last_analyzed(self) -> Optional[int]:
-        return self._db.get_application_time(self._application)
+        return cast(int, self._db.get_application_time(self._application))
     def _update_last_analyzed(self, last_analyzed: int) -> None:
         self._db.update_application_time(
@@ -161,18 +162,20 @@ class _BatchWindowGenerator(AbstractContextManager):
         )
     @classmethod
-    def _get_last_updated_time(cls, last_request: str, has_stream: bool) -> int:
+    def _get_last_updated_time(
+        cls, last_request: datetime.datetime, not_batch_endpoint: bool
+    ) -> int:
         """
         Get the last updated time of a model endpoint.
         """
         last_updated = int(
-            cls._date_string2timestamp(last_request)
+            last_request.timestamp()
             - cast(
                 float,
                 mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
         )
-        if not has_stream:
+        if not not_batch_endpoint:
             # If the endpoint does not have a stream, `last_updated` should be
             # the minimum between the current time and the last updated time.
             # This compensates for the bumping mechanism - see
@@ -183,17 +186,13 @@ class _BatchWindowGenerator(AbstractContextManager):
             )
         return last_updated
-    @staticmethod
-    def _date_string2timestamp(date_string: str) -> int:
-        return int(datetime.datetime.fromisoformat(date_string).timestamp())
     def get_intervals(
         self,
         *,
         application: str,
-        first_request: str,
-        last_request: str,
-        has_stream: bool,
+        first_request: datetime.datetime,
+        last_request: datetime.datetime,
+        not_batch_endpoint: bool,
     ) -> Iterator[_Interval]:
         """
         Get the batch window for a specific endpoint and application.
@@ -204,8 +203,8 @@ class _BatchWindowGenerator(AbstractContextManager):
             schedules_file=self._schedules_file,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request, has_stream),
-            first_request=self._date_string2timestamp(first_request),
+            last_updated=self._get_last_updated_time(last_request, not_batch_endpoint),
+            first_request=int(first_request.timestamp()),
         )
         yield from batch_window.get_intervals()
@@ -235,8 +234,6 @@ class MonitoringApplicationController:
         logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
-        self.db = mlrun.model_monitoring.get_store_object(project=self.project)
         self._window_length = _get_window_length()
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
@@ -253,19 +250,16 @@ class MonitoringApplicationController:
         return access_key
     @staticmethod
-    def _should_monitor_endpoint(endpoint: dict[str, Any]) -> bool:
+    def _should_monitor_endpoint(endpoint: mlrun.common.schemas.ModelEndpoint) -> bool:
         return (
-            # Is the model endpoint active?
-            endpoint[mm_constants.EventFieldType.ACTIVE]
             # Is the model endpoint monitored?
-            and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
-            == mm_constants.ModelMonitoringMode.enabled
+            endpoint.status.monitoring_mode == mm_constants.ModelMonitoringMode.enabled
             # Was the model endpoint called? I.e., are the first and last requests nonempty?
-            and endpoint[mm_constants.EventFieldType.FIRST_REQUEST]
-            and endpoint[mm_constants.EventFieldType.LAST_REQUEST]
+            and endpoint.status.first_request
+            and endpoint.status.last_request
             # Is the model endpoint not a router endpoint? Router endpoint has no feature stats
-            and int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
-            != mm_constants.EndpointType.ROUTER
+            and endpoint.metadata.endpoint_type.value
+            != mm_constants.EndpointType.ROUTER.value
         )
     def run(self) -> None:
@@ -281,7 +275,10 @@ class MonitoringApplicationController:
         logger.info("Start running monitoring controller")
         try:
             applications_names = []
-            endpoints = self.db.list_model_endpoints(include_stats=True)
+            endpoints_list = mlrun.db.get_run_db().list_model_endpoints(
+                project=self.project, tsdb_metrics=True
+            )
+            endpoints = endpoints_list.endpoints
             if not endpoints:
                 logger.info("No model endpoints found", project=self.project)
                 return
@@ -333,12 +330,19 @@ class MonitoringApplicationController:
                         model_monitoring_access_key=self.model_monitoring_access_key,
                         storage_options=self.storage_options,
                     )
+                else:
+                    logger.debug(
+                        "Skipping endpoint, not ready or not suitable for monitoring",
+                        endpoint_id=endpoint.metadata.uid,
+                        endpoint_name=endpoint.metadata.name,
+                    )
+        logger.info("Finished running monitoring controller")
     @classmethod
     def model_endpoint_process(
         cls,
         project: str,
-        endpoint: dict,
+        endpoint: mlrun.common.schemas.ModelEndpoint,
         applications_names: list[str],
         window_length: int,
         model_monitoring_access_key: str,
@@ -356,11 +360,11 @@ class MonitoringApplicationController:
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
         :param storage_options:             (dict) Storage options for reading the infer parquet files.
         """
-        endpoint_id = endpoint[mm_constants.EventFieldType.UID]
-        has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
-        m_fs = fstore.get_feature_set(
-            endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
+        endpoint_id = endpoint.metadata.uid
+        not_batch_endpoint = not (
+            endpoint.metadata.endpoint_type == EndpointType.BATCH_EP
         )
+        m_fs = fstore.get_feature_set(endpoint.spec.monitoring_feature_set_uri)
         try:
             with _BatchWindowGenerator(
                 project=project, endpoint_id=endpoint_id, window_length=window_length
@@ -371,11 +375,9 @@ class MonitoringApplicationController:
                         end_infer_time,
                     ) in batch_window_generator.get_intervals(
                         application=application,
-                        first_request=endpoint[
-                            mm_constants.EventFieldType.FIRST_REQUEST
-                        ],
-                        last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
-                        has_stream=has_stream,
+                        first_request=endpoint.status.first_request,
+                        last_request=endpoint.status.last_request,
+                        not_batch_endpoint=not_batch_endpoint,
                     ):
                         df = m_fs.to_dataframe(
                             start_time=start_infer_time,
@@ -401,15 +403,17 @@ class MonitoringApplicationController:
                                 start_infer_time=start_infer_time,
                                 end_infer_time=end_infer_time,
                                 endpoint_id=endpoint_id,
+                                endpoint_name=endpoint.metadata.name,
                                 project=project,
                                 applications_names=[application],
                                 model_monitoring_access_key=model_monitoring_access_key,
                             )
+                logger.info("Finished processing endpoint", endpoint_id=endpoint_id)
         except Exception:
             logger.exception(
                 "Encountered an exception",
-                endpoint_id=endpoint[mm_constants.EventFieldType.UID],
+                endpoint_id=endpoint.metadata.uid,
             )
     @staticmethod
@@ -417,6 +421,7 @@ class MonitoringApplicationController:
         start_infer_time: datetime.datetime,
         end_infer_time: datetime.datetime,
         endpoint_id: str,
+        endpoint_name: str,
         project: str,
         applications_names: list[str],
         model_monitoring_access_key: str,
@@ -440,6 +445,7 @@ class MonitoringApplicationController:
                 sep=" ", timespec="microseconds"
             ),
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
+            mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
                 function_name=mm_constants.MonitoringFunctionNames.WRITER,

mlrun/model_monitoring/db/__init__.py CHANGED Viewed

@@ -12,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .stores import ObjectStoreFactory, get_store_object
-from .stores.base import StoreBase
 from .tsdb import get_tsdb_connector
 from .tsdb.base import TSDBConnector

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -47,7 +47,7 @@ class TSDBConnector(ABC):
         self.project = project
     @abstractmethod
-    def apply_monitoring_stream_steps(self, graph) -> None:
+    def apply_monitoring_stream_steps(self, graph, **kwargs) -> None:
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -294,6 +294,7 @@ class TSDBConnector(ABC):
     ) -> pd.DataFrame:
         """
         Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
+        in the provided time range, which by default is the last 24 hours.
         :param endpoint_ids:    A list of model endpoint identifiers.
         :param start:           The start time for the query.

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -164,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
     def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
         return datetime.fromisoformat(val) if isinstance(val, str) else val
-    def apply_monitoring_stream_steps(self, graph):
+    def apply_monitoring_stream_steps(self, graph, **kwarg):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -701,6 +701,7 @@ class TDEngineConnector(TSDBConnector):
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
         tsdb_batching_max_events: int = 1000,
         tsdb_batching_timeout_secs: int = 30,
         sample_window: int = 10,
+        aggregate_windows: Optional[list[str]] = None,
+        aggregate_period: str = "1m",
+        **kwarg,
     ):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
         - endpoint_features (Prediction and feature names and values)
         - custom_metrics (user-defined metrics)
         """
+        aggregate_windows = aggregate_windows or ["5m", "1h"]
+        # Calculate number of predictions and average latency
+        def apply_storey_aggregations():
+            # Calculate number of predictions for each window (5 min and 1 hour by default)
+            graph.add_step(
+                class_name="storey.AggregateByKey",
+                aggregates=[
+                    {
+                        "name": EventFieldType.LATENCY,
+                        "column": EventFieldType.LATENCY,
+                        "operations": ["count", "avg"],
+                        "windows": aggregate_windows,
+                        "period": aggregate_period,
+                    }
+                ],
+                name=EventFieldType.LATENCY,
+                after="MapFeatureNames",
+                step_name="Aggregates",
+                table=".",
+                key_field=EventFieldType.ENDPOINT_ID,
+            )
+            # Calculate average latency time for each window (5 min and 1 hour by default)
+            graph.add_step(
+                class_name="storey.Rename",
+                mapping={
+                    "latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
+                    "latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
+                },
+                name="Rename",
+                after=EventFieldType.LATENCY,
+            )
+        apply_storey_aggregations()
         # Write latency per prediction, labeled by endpoint ID only
         graph.add_step(
             "storey.TSDBTarget",
@@ -853,6 +889,7 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.PREDICTIONS,
@@ -864,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
         )
         if not df.empty:
             df.dropna(inplace=True)
+            df.rename(
+                columns={
+                    f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
+                },
+                inplace=True,
+            )
         return df.reset_index(drop=True)

mlrun 1.8.0rc5__py3-none-any.whl → 1.8.0rc9__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc5py3-none-any.whl → 1.8.0rc9py3-none-any.whl