PyPI - mlrun - Versions diffs - 1.8.0rc5__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl - Mend

mlrun 1.8.0rc5py3-none-any.whl → 1.8.0rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (60) hide show

mlrun/artifacts/__init__.py +1 -1
mlrun/artifacts/base.py +12 -1
mlrun/artifacts/document.py +59 -38
mlrun/common/model_monitoring/__init__.py +0 -2
mlrun/common/model_monitoring/helpers.py +0 -28
mlrun/common/schemas/__init__.py +1 -4
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/__init__.py +0 -6
mlrun/common/schemas/model_monitoring/constants.py +11 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
mlrun/common/schemas/notification.py +6 -0
mlrun/config.py +0 -2
mlrun/datastore/datastore_profile.py +57 -17
mlrun/datastore/vectorstore.py +67 -59
mlrun/db/base.py +22 -18
mlrun/db/httpdb.py +116 -148
mlrun/db/nopdb.py +33 -17
mlrun/execution.py +11 -4
mlrun/model.py +3 -0
mlrun/model_monitoring/__init__.py +3 -2
mlrun/model_monitoring/api.py +40 -43
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/applications/context.py +15 -17
mlrun/model_monitoring/controller.py +43 -37
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/tsdb/base.py +2 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
mlrun/model_monitoring/helpers.py +12 -66
mlrun/model_monitoring/stream_processing.py +83 -270
mlrun/model_monitoring/writer.py +1 -10
mlrun/projects/project.py +63 -55
mlrun/runtimes/nuclio/function.py +7 -6
mlrun/runtimes/nuclio/serving.py +7 -1
mlrun/serving/routers.py +158 -145
mlrun/serving/server.py +6 -0
mlrun/serving/states.py +2 -0
mlrun/serving/v2_serving.py +69 -60
mlrun/utils/helpers.py +14 -30
mlrun/utils/notifications/notification/mail.py +17 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc6.dist-info}/METADATA +1 -1
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc6.dist-info}/RECORD +47 -60
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
mlrun/model_monitoring/db/stores/base/store.py +0 -154
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
mlrun/model_monitoring/model_endpoint.py +0 -120
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc6.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc6.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc6.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc6.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -26,11 +26,14 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.feature_store
 import mlrun.model_monitoring.applications as mm_app
 import mlrun.serving
+from mlrun.common.schemas import ModelEndpoint
+from mlrun.common.schemas.model_monitoring import (
+    FunctionURI,
+)
 from mlrun.data_types.infer import InferOptions, get_df_stats
 from mlrun.utils import datetime_now, logger
 from .helpers import update_model_endpoint_last_request
-from .model_endpoint import ModelEndpoint
 # A union of all supported dataset types:
 DatasetType = typing.Union[
@@ -46,8 +49,6 @@ def get_or_create_model_endpoint(
     function_name: str = "",
     context: mlrun.MLClientCtx = None,
     sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
-    drift_threshold: typing.Optional[float] = None,
-    possible_drift_threshold: typing.Optional[float] = None,
     monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
     db_session=None,
 ) -> ModelEndpoint:
@@ -68,10 +69,6 @@ def get_or_create_model_endpoint(
                                      full function hash.
     :param sample_set_statistics:    Dictionary of sample set statistics that will be used as a reference data for
                                      the new model endpoint (applicable only to new endpoint_id).
-    :param drift_threshold:          (deprecated) The threshold of which to mark drifts (applicable only to new
-                                     endpoint_id).
-    :param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts (applicable only to new
-                                     endpoint_id).
     :param monitoring_mode:          If enabled, apply model monitoring features on the provided endpoint id
                                      (applicable only to new endpoint_id).
     :param db_session:               A runtime session that manages the current dialog with the database.
@@ -79,18 +76,15 @@ def get_or_create_model_endpoint(
     :return: A ModelEndpoint object
     """
-    if not endpoint_id:
-        # Generate a new model endpoint id based on the project name and model name
-        endpoint_id = hashlib.sha1(
-            f"{project}_{model_endpoint_name}".encode()
-        ).hexdigest()
     if not db_session:
         # Generate a runtime database
         db_session = mlrun.get_run_db()
     try:
         model_endpoint = db_session.get_model_endpoint(
-            project=project, endpoint_id=endpoint_id
+            project=project,
+            name=model_endpoint_name,
+            endpoint_id=endpoint_id,
+            function_name=function_name,
         )
         # If other fields provided, validate that they are correspond to the existing model endpoint data
         _model_endpoint_validations(
@@ -104,7 +98,6 @@ def get_or_create_model_endpoint(
         model_endpoint = _generate_model_endpoint(
             project=project,
             db_session=db_session,
-            endpoint_id=endpoint_id,
             model_path=model_path,
             model_endpoint_name=model_endpoint_name,
             function_name=function_name,
@@ -208,13 +201,13 @@ def record_results(
         monitoring_mode=monitoring_mode,
         db_session=db,
     )
-    logger.debug("Model endpoint", endpoint=model_endpoint.to_dict())
+    logger.debug("Model endpoint", endpoint=model_endpoint)
     timestamp = datetime_now()
     if infer_results_df is not None:
         # Write the monitoring parquet to the relevant model endpoint context
         write_monitoring_df(
-            feature_set_uri=model_endpoint.status.monitoring_feature_set_uri,
+            feature_set_uri=model_endpoint.spec.monitoring_feature_set_uri,
             infer_datetime=timestamp,
             endpoint_id=model_endpoint.metadata.uid,
             infer_results_df=infer_results_df,
@@ -278,7 +271,7 @@ def _model_endpoint_validations(
     # Feature stats
     if (
         sample_set_statistics
-        and sample_set_statistics != model_endpoint.status.feature_stats
+        and sample_set_statistics != model_endpoint.spec.feature_stats
     ):
         logger.warning(
             "Provided sample set statistics is different from the registered statistics. "
@@ -330,7 +323,6 @@ def write_monitoring_df(
 def _generate_model_endpoint(
     project: str,
     db_session,
-    endpoint_id: str,
     model_path: str,
     model_endpoint_name: str,
     function_name: str,
@@ -344,7 +336,6 @@ def _generate_model_endpoint(
     :param project:                  Project name.
     :param db_session:               A session that manages the current dialog with the database.
-    :param endpoint_id:              Model endpoint unique ID.
     :param model_path:               The model Store path.
     :param model_endpoint_name:      Model endpoint name will be presented under the new model endpoint.
     :param function_name:            If a new model endpoint is created, use this function name for generating the
@@ -357,32 +348,38 @@ def _generate_model_endpoint(
     :return `mlrun.model_monitoring.model_endpoint.ModelEndpoint` object.
     """
-    model_endpoint = ModelEndpoint()
-    model_endpoint.metadata.project = project
-    model_endpoint.metadata.uid = endpoint_id
-    if function_name:
-        model_endpoint.spec.function_uri = project + "/" + function_name
-    elif not context:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            "Please provide either a function name or a valid MLRun context"
+    if not function_name and context:
+        function_name = FunctionURI.from_string(
+            context.to_dict()["spec"]["function"]
+        ).function
+    model_obj = None
+    if model_path:
+        model_obj: mlrun.artifacts.ModelArtifact = (
+            mlrun.datastore.store_resources.get_store_resource(
+                model_path, db=db_session
+            )
         )
-    else:
-        model_endpoint.spec.function_uri = context.to_dict()["spec"]["function"]
-    model_endpoint.spec.model_uri = model_path
-    model_endpoint.spec.model = model_endpoint_name
-    model_endpoint.spec.model_class = "drift-analysis"
-    model_endpoint.spec.monitoring_mode = monitoring_mode
-    model_endpoint.status.first_request = model_endpoint.status.last_request = (
-        datetime_now().isoformat()
-    )
-    if sample_set_statistics:
-        model_endpoint.status.feature_stats = sample_set_statistics
-    db_session.create_model_endpoint(
-        project=project, endpoint_id=endpoint_id, model_endpoint=model_endpoint
+    current_time = datetime_now()
+    model_endpoint = mlrun.common.schemas.ModelEndpoint(
+        metadata=mlrun.common.schemas.ModelEndpointMetadata(
+            project=project,
+            name=model_endpoint_name,
+            endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
+        ),
+        spec=mlrun.common.schemas.ModelEndpointSpec(
+            function_name=function_name,
+            model_name=model_obj.metadata.key if model_path else None,
+            model_uid=model_obj.metadata.uid if model_path else None,
+            model_class="drift-analysis",
+        ),
+        status=mlrun.common.schemas.ModelEndpointStatus(
+            monitoring_mode=monitoring_mode,
+            first_request=current_time,
+            last_request=current_time,
+        ),
     )
-    return db_session.get_model_endpoint(project=project, endpoint_id=endpoint_id)
+    return db_session.create_model_endpoint(model_endpoint=model_endpoint)
 def get_sample_set_statistics(

mlrun/model_monitoring/applications/_application_steps.py CHANGED Viewed

@@ -16,6 +16,7 @@ import json
 import traceback
 from typing import Any, Optional, Union
+import mlrun.common.schemas
 import mlrun.common.schemas.alert as alert_objects
 import mlrun.common.schemas.model_monitoring.constants as mm_constant
 import mlrun.datastore
@@ -81,6 +82,7 @@ class _PushToMonitoringWriter(StepToDict):
         self._lazy_init()
         application_results, application_context = event
         writer_event = {
+            mm_constant.WriterEvent.ENDPOINT_NAME: application_context.endpoint_name,
             mm_constant.WriterEvent.APPLICATION_NAME: application_context.application_name,
             mm_constant.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
             mm_constant.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
@@ -125,7 +127,7 @@ class _PrepareMonitoringEvent(StepToDict):
         """
         self.graph_context = context
         self.application_name = application_name
-        self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
+        self.model_endpoints: dict[str, mlrun.common.schemas.ModelEndpoint] = {}
     def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
         """

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import socket
 from typing import Any, Optional, Protocol, cast
@@ -28,12 +27,11 @@ import mlrun.features
 import mlrun.serving
 import mlrun.utils
 from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
-from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
+from mlrun.common.model_monitoring.helpers import FeatureStats
+from mlrun.common.schemas import ModelEndpoint
 from mlrun.model_monitoring.helpers import (
     calculate_inputs_statistics,
-    get_endpoint_record,
 )
-from mlrun.model_monitoring.model_endpoint import ModelEndpoint
 class _ArtifactsLogger(Protocol):
@@ -64,6 +62,7 @@ class MonitoringApplicationContext:
     :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
     :param latest_request:          (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
     :param endpoint_id:             (str) ID of the monitored model endpoint
+    :param endpoint_name:           (str) Name of the monitored model endpoint
     :param output_stream_uri:       (str) URI of the output stream for results
     :param model_endpoint:          (ModelEndpoint) The model endpoint object.
     :param feature_names:           (list[str]) List of models feature names.
@@ -134,6 +133,9 @@ class MonitoringApplicationContext:
         self.endpoint_id = cast(
             str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
         )
+        self.endpoint_name = cast(
+            str, event.get(mm_constants.ApplicationEvent.ENDPOINT_NAME)
+        )
         self.output_stream_uri = cast(
             str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
         )
@@ -166,7 +168,7 @@ class MonitoringApplicationContext:
     def sample_df(self) -> pd.DataFrame:
         if self._sample_df is None:
             feature_set = fstore.get_feature_set(
-                self.model_endpoint.status.monitoring_feature_set_uri
+                self.model_endpoint.spec.monitoring_feature_set_uri
             )
             features = [f"{feature_set.metadata.name}.*"]
             vector = fstore.FeatureVector(
@@ -188,16 +190,18 @@ class MonitoringApplicationContext:
     @property
     def model_endpoint(self) -> ModelEndpoint:
         if not self._model_endpoint:
-            self._model_endpoint = ModelEndpoint.from_flat_dict(
-                get_endpoint_record(self.project_name, self.endpoint_id)
+            self._model_endpoint = mlrun.db.get_run_db().get_model_endpoint(
+                name=self.endpoint_name,
+                project=self.project_name,
+                endpoint_id=self.endpoint_id,
+                feature_analysis=True,
             )
         return self._model_endpoint
     @property
     def feature_stats(self) -> FeatureStats:
         if not self._feature_stats:
-            self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
-            pad_features_hist(self._feature_stats)
+            self._feature_stats = self.model_endpoint.spec.feature_stats
         return self._feature_stats
     @property
@@ -212,18 +216,12 @@ class MonitoringApplicationContext:
     @property
     def feature_names(self) -> list[str]:
         """The feature names of the model"""
-        feature_names = self.model_endpoint.spec.feature_names
-        return (
-            feature_names
-            if isinstance(feature_names, list)
-            else json.loads(feature_names)
-        )
+        return self.model_endpoint.spec.feature_names
     @property
     def label_names(self) -> list[str]:
         """The label names of the model"""
-        label_names = self.model_endpoint.spec.label_names
-        return label_names if isinstance(label_names, list) else json.loads(label_names)
+        return self.model_endpoint.spec.label_names
     @property
     def model(self) -> tuple[str, ModelArtifact, dict]:

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -19,7 +19,7 @@ import os
 from collections.abc import Iterator
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Any, NamedTuple, Optional, cast
+from typing import NamedTuple, Optional, cast
 import nuclio_sdk
@@ -27,6 +27,7 @@ import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.feature_store as fstore
 import mlrun.model_monitoring
+from mlrun.common.schemas import EndpointType
 from mlrun.datastore import get_stream_pusher
 from mlrun.errors import err_to_str
 from mlrun.model_monitoring.db._schedules import ModelMonitoringSchedulesFile
@@ -65,7 +66,7 @@ class _BatchWindow:
         self._start = self._get_last_analyzed()
     def _get_saved_last_analyzed(self) -> Optional[int]:
-        return self._db.get_application_time(self._application)
+        return cast(int, self._db.get_application_time(self._application))
     def _update_last_analyzed(self, last_analyzed: int) -> None:
         self._db.update_application_time(
@@ -161,18 +162,20 @@ class _BatchWindowGenerator(AbstractContextManager):
         )
     @classmethod
-    def _get_last_updated_time(cls, last_request: str, has_stream: bool) -> int:
+    def _get_last_updated_time(
+        cls, last_request: datetime.datetime, not_batch_endpoint: bool
+    ) -> int:
         """
         Get the last updated time of a model endpoint.
         """
         last_updated = int(
-            cls._date_string2timestamp(last_request)
+            last_request.timestamp()
             - cast(
                 float,
                 mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
         )
-        if not has_stream:
+        if not not_batch_endpoint:
             # If the endpoint does not have a stream, `last_updated` should be
             # the minimum between the current time and the last updated time.
             # This compensates for the bumping mechanism - see
@@ -183,17 +186,13 @@ class _BatchWindowGenerator(AbstractContextManager):
             )
         return last_updated
-    @staticmethod
-    def _date_string2timestamp(date_string: str) -> int:
-        return int(datetime.datetime.fromisoformat(date_string).timestamp())
     def get_intervals(
         self,
         *,
         application: str,
-        first_request: str,
-        last_request: str,
-        has_stream: bool,
+        first_request: datetime.datetime,
+        last_request: datetime.datetime,
+        not_batch_endpoint: bool,
     ) -> Iterator[_Interval]:
         """
         Get the batch window for a specific endpoint and application.
@@ -204,8 +203,8 @@ class _BatchWindowGenerator(AbstractContextManager):
             schedules_file=self._schedules_file,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request, has_stream),
-            first_request=self._date_string2timestamp(first_request),
+            last_updated=self._get_last_updated_time(last_request, not_batch_endpoint),
+            first_request=int(first_request.timestamp()),
         )
         yield from batch_window.get_intervals()
@@ -235,8 +234,6 @@ class MonitoringApplicationController:
         logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
-        self.db = mlrun.model_monitoring.get_store_object(project=self.project)
         self._window_length = _get_window_length()
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
@@ -253,19 +250,16 @@ class MonitoringApplicationController:
         return access_key
     @staticmethod
-    def _should_monitor_endpoint(endpoint: dict[str, Any]) -> bool:
+    def _should_monitor_endpoint(endpoint: mlrun.common.schemas.ModelEndpoint) -> bool:
         return (
-            # Is the model endpoint active?
-            endpoint[mm_constants.EventFieldType.ACTIVE]
             # Is the model endpoint monitored?
-            and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
-            == mm_constants.ModelMonitoringMode.enabled
+            endpoint.status.monitoring_mode == mm_constants.ModelMonitoringMode.enabled
             # Was the model endpoint called? I.e., are the first and last requests nonempty?
-            and endpoint[mm_constants.EventFieldType.FIRST_REQUEST]
-            and endpoint[mm_constants.EventFieldType.LAST_REQUEST]
+            and endpoint.status.first_request
+            and endpoint.status.last_request
             # Is the model endpoint not a router endpoint? Router endpoint has no feature stats
-            and int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
-            != mm_constants.EndpointType.ROUTER
+            and endpoint.metadata.endpoint_type.value
+            != mm_constants.EndpointType.ROUTER.value
         )
     def run(self) -> None:
@@ -281,7 +275,10 @@ class MonitoringApplicationController:
         logger.info("Start running monitoring controller")
         try:
             applications_names = []
-            endpoints = self.db.list_model_endpoints(include_stats=True)
+            endpoints_list = mlrun.db.get_run_db().list_model_endpoints(
+                project=self.project, tsdb_metrics=True
+            )
+            endpoints = endpoints_list.endpoints
             if not endpoints:
                 logger.info("No model endpoints found", project=self.project)
                 return
@@ -333,12 +330,19 @@ class MonitoringApplicationController:
                         model_monitoring_access_key=self.model_monitoring_access_key,
                         storage_options=self.storage_options,
                     )
+                else:
+                    logger.debug(
+                        "Skipping endpoint, not ready or not suitable for monitoring",
+                        endpoint_id=endpoint.metadata.uid,
+                        endpoint_name=endpoint.metadata.name,
+                    )
+        logger.info("Finished running monitoring controller")
     @classmethod
     def model_endpoint_process(
         cls,
         project: str,
-        endpoint: dict,
+        endpoint: mlrun.common.schemas.ModelEndpoint,
         applications_names: list[str],
         window_length: int,
         model_monitoring_access_key: str,
@@ -356,11 +360,11 @@ class MonitoringApplicationController:
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
         :param storage_options:             (dict) Storage options for reading the infer parquet files.
         """
-        endpoint_id = endpoint[mm_constants.EventFieldType.UID]
-        has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
-        m_fs = fstore.get_feature_set(
-            endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
+        endpoint_id = endpoint.metadata.uid
+        not_batch_endpoint = not (
+            endpoint.metadata.endpoint_type == EndpointType.BATCH_EP
         )
+        m_fs = fstore.get_feature_set(endpoint.spec.monitoring_feature_set_uri)
         try:
             with _BatchWindowGenerator(
                 project=project, endpoint_id=endpoint_id, window_length=window_length
@@ -371,11 +375,9 @@ class MonitoringApplicationController:
                         end_infer_time,
                     ) in batch_window_generator.get_intervals(
                         application=application,
-                        first_request=endpoint[
-                            mm_constants.EventFieldType.FIRST_REQUEST
-                        ],
-                        last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
-                        has_stream=has_stream,
+                        first_request=endpoint.status.first_request,
+                        last_request=endpoint.status.last_request,
+                        not_batch_endpoint=not_batch_endpoint,
                     ):
                         df = m_fs.to_dataframe(
                             start_time=start_infer_time,
@@ -401,15 +403,17 @@ class MonitoringApplicationController:
                                 start_infer_time=start_infer_time,
                                 end_infer_time=end_infer_time,
                                 endpoint_id=endpoint_id,
+                                endpoint_name=endpoint.metadata.name,
                                 project=project,
                                 applications_names=[application],
                                 model_monitoring_access_key=model_monitoring_access_key,
                             )
+                logger.info("Finished processing endpoint", endpoint_id=endpoint_id)
         except Exception:
             logger.exception(
                 "Encountered an exception",
-                endpoint_id=endpoint[mm_constants.EventFieldType.UID],
+                endpoint_id=endpoint.metadata.uid,
             )
     @staticmethod
@@ -417,6 +421,7 @@ class MonitoringApplicationController:
         start_infer_time: datetime.datetime,
         end_infer_time: datetime.datetime,
         endpoint_id: str,
+        endpoint_name: str,
         project: str,
         applications_names: list[str],
         model_monitoring_access_key: str,
@@ -440,6 +445,7 @@ class MonitoringApplicationController:
                 sep=" ", timespec="microseconds"
             ),
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
+            mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
                 function_name=mm_constants.MonitoringFunctionNames.WRITER,

mlrun/model_monitoring/db/__init__.py CHANGED Viewed

@@ -12,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .stores import ObjectStoreFactory, get_store_object
-from .stores.base import StoreBase
 from .tsdb import get_tsdb_connector
 from .tsdb.base import TSDBConnector

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -47,7 +47,7 @@ class TSDBConnector(ABC):
         self.project = project
     @abstractmethod
-    def apply_monitoring_stream_steps(self, graph) -> None:
+    def apply_monitoring_stream_steps(self, graph, **kwargs) -> None:
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -294,6 +294,7 @@ class TSDBConnector(ABC):
     ) -> pd.DataFrame:
         """
         Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
+        in the provided time range, which by default is the last 24 hours.
         :param endpoint_ids:    A list of model endpoint identifiers.
         :param start:           The start time for the query.

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -164,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
     def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
         return datetime.fromisoformat(val) if isinstance(val, str) else val
-    def apply_monitoring_stream_steps(self, graph):
+    def apply_monitoring_stream_steps(self, graph, **kwarg):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
         different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -701,6 +701,7 @@ class TDEngineConnector(TSDBConnector):
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
         tsdb_batching_max_events: int = 1000,
         tsdb_batching_timeout_secs: int = 30,
         sample_window: int = 10,
+        aggregate_windows: Optional[list[str]] = None,
+        aggregate_period: str = "1m",
+        **kwarg,
     ):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
         - endpoint_features (Prediction and feature names and values)
         - custom_metrics (user-defined metrics)
         """
+        aggregate_windows = aggregate_windows or ["5m", "1h"]
+        # Calculate number of predictions and average latency
+        def apply_storey_aggregations():
+            # Calculate number of predictions for each window (5 min and 1 hour by default)
+            graph.add_step(
+                class_name="storey.AggregateByKey",
+                aggregates=[
+                    {
+                        "name": EventFieldType.LATENCY,
+                        "column": EventFieldType.LATENCY,
+                        "operations": ["count", "avg"],
+                        "windows": aggregate_windows,
+                        "period": aggregate_period,
+                    }
+                ],
+                name=EventFieldType.LATENCY,
+                after="MapFeatureNames",
+                step_name="Aggregates",
+                table=".",
+                key_field=EventFieldType.ENDPOINT_ID,
+            )
+            # Calculate average latency time for each window (5 min and 1 hour by default)
+            graph.add_step(
+                class_name="storey.Rename",
+                mapping={
+                    "latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
+                    "latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
+                },
+                name="Rename",
+                after=EventFieldType.LATENCY,
+            )
+        apply_storey_aggregations()
         # Write latency per prediction, labeled by endpoint ID only
         graph.add_step(
             "storey.TSDBTarget",
@@ -853,6 +889,7 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.PREDICTIONS,
@@ -864,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
         )
         if not df.empty:
             df.dropna(inplace=True)
+            df.rename(
+                columns={
+                    f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
+                },
+                inplace=True,
+            )
         return df.reset_index(drop=True)

mlrun 1.8.0rc5__py3-none-any.whl → 1.8.0rc6__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc5py3-none-any.whl → 1.8.0rc6py3-none-any.whl