PyPI - mlrun - Versions diffs - 1.8.0rc5__py3-none-any.whl → 1.8.0rc9__py3-none-any.whl - Mend

mlrun 1.8.0rc5py3-none-any.whl → 1.8.0rc9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (74) hide show

mlrun/__init__.py +1 -0
mlrun/artifacts/__init__.py +1 -1
mlrun/artifacts/base.py +21 -1
mlrun/artifacts/document.py +62 -39
mlrun/artifacts/manager.py +12 -5
mlrun/common/constants.py +1 -0
mlrun/common/model_monitoring/__init__.py +0 -2
mlrun/common/model_monitoring/helpers.py +0 -28
mlrun/common/schemas/__init__.py +2 -4
mlrun/common/schemas/alert.py +77 -1
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/__init__.py +0 -6
mlrun/common/schemas/model_monitoring/constants.py +11 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
mlrun/common/schemas/notification.py +6 -0
mlrun/common/schemas/project.py +3 -0
mlrun/config.py +2 -3
mlrun/datastore/datastore_profile.py +57 -17
mlrun/datastore/sources.py +1 -2
mlrun/datastore/store_resources.py +7 -2
mlrun/datastore/vectorstore.py +99 -62
mlrun/db/base.py +34 -20
mlrun/db/httpdb.py +249 -163
mlrun/db/nopdb.py +40 -17
mlrun/execution.py +14 -7
mlrun/feature_store/api.py +1 -0
mlrun/model.py +3 -0
mlrun/model_monitoring/__init__.py +3 -2
mlrun/model_monitoring/api.py +64 -53
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/applications/base.py +115 -15
mlrun/model_monitoring/applications/context.py +42 -24
mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
mlrun/model_monitoring/controller.py +43 -37
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/tsdb/base.py +2 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
mlrun/model_monitoring/helpers.py +78 -66
mlrun/model_monitoring/stream_processing.py +83 -270
mlrun/model_monitoring/writer.py +1 -10
mlrun/projects/pipelines.py +37 -1
mlrun/projects/project.py +173 -70
mlrun/run.py +40 -0
mlrun/runtimes/nuclio/function.py +7 -6
mlrun/runtimes/nuclio/serving.py +9 -4
mlrun/serving/routers.py +158 -145
mlrun/serving/server.py +6 -0
mlrun/serving/states.py +21 -7
mlrun/serving/v2_serving.py +94 -68
mlrun/utils/helpers.py +23 -33
mlrun/utils/notifications/notification/mail.py +17 -6
mlrun/utils/notifications/notification_pusher.py +9 -5
mlrun/utils/regex.py +8 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/METADATA +2 -2
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/RECORD +61 -74
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
mlrun/model_monitoring/db/stores/base/store.py +0 -154
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
mlrun/model_monitoring/model_endpoint.py +0 -120
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import collections
 import datetime
-import json
 import os
 import typing
@@ -32,13 +31,10 @@ import mlrun.utils
 from mlrun.common.schemas.model_monitoring.constants import (
     EndpointType,
     EventFieldType,
-    EventKeyMetrics,
-    EventLiveStats,
     FileTargetKind,
-    ModelEndpointTarget,
     ProjectSecretKeys,
 )
-from mlrun.model_monitoring.db import StoreBase, TSDBConnector
+from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.utils import logger
@@ -102,18 +98,6 @@ class EventStreamProcessor:
             v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
         )
-        # KV path
-        kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
-            project=self.project, kind=FileTargetKind.ENDPOINTS
-        )
-        (
-            _,
-            self.kv_container,
-            self.kv_path,
-        ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
-            kv_path
-        )
         # TSDB path and configurations
         tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
             project=self.project, kind=FileTargetKind.EVENTS
@@ -134,7 +118,6 @@ class EventStreamProcessor:
         self,
         fn: mlrun.runtimes.ServingRuntime,
         tsdb_connector: TSDBConnector,
-        endpoint_store: StoreBase,
     ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -163,31 +146,23 @@ class EventStreamProcessor:
         :param fn: A serving function.
         :param tsdb_connector: Time series database connector.
-        :param endpoint_store: KV/SQL store used for endpoint data.
         """
         graph = typing.cast(
             mlrun.serving.states.RootFlowStep,
             fn.set_topology(mlrun.serving.states.StepKinds.flow),
         )
-        graph.add_step(
-            "ExtractEndpointID",
-            "extract_endpoint",
-            full_event=True,
-        )
         # split the graph between event with error vs valid event
         graph.add_step(
             "storey.Filter",
             "FilterError",
-            after="extract_endpoint",
             _fn="(event.get('error') is None)",
         )
         graph.add_step(
             "storey.Filter",
             "ForwardError",
-            after="extract_endpoint",
             _fn="(event.get('error') is not None)",
         )
@@ -199,7 +174,7 @@ class EventStreamProcessor:
         def apply_process_endpoint_event():
             graph.add_step(
                 "ProcessEndpointEvent",
-                after="extract_endpoint",  # TODO: change this to FilterError in ML-7456
+                after="FilterError",
                 full_event=True,
                 project=self.project,
             )
@@ -234,79 +209,11 @@ class EventStreamProcessor:
             )
         apply_map_feature_names()
-        # Calculate number of predictions and average latency
-        def apply_storey_aggregations():
-            # Calculate number of predictions for each window (5 min and 1 hour by default)
-            graph.add_step(
-                class_name="storey.AggregateByKey",
-                aggregates=[
-                    {
-                        "name": EventFieldType.LATENCY,
-                        "column": EventFieldType.LATENCY,
-                        "operations": ["count", "avg"],
-                        "windows": self.aggregate_windows,
-                        "period": self.aggregate_period,
-                    }
-                ],
-                name=EventFieldType.LATENCY,
-                after="MapFeatureNames",
-                step_name="Aggregates",
-                table=".",
-                key_field=EventFieldType.ENDPOINT_ID,
-            )
-            # Calculate average latency time for each window (5 min and 1 hour by default)
-            graph.add_step(
-                class_name="storey.Rename",
-                mapping={
-                    "latency_count_5m": EventLiveStats.PREDICTIONS_COUNT_5M,
-                    "latency_count_1h": EventLiveStats.PREDICTIONS_COUNT_1H,
-                },
-                name="Rename",
-                after=EventFieldType.LATENCY,
-            )
-        apply_storey_aggregations()
-        # KV/SQL branch
-        # Filter relevant keys from the event before writing the data into the database table
-        def apply_process_before_endpoint_update():
-            graph.add_step(
-                "ProcessBeforeEndpointUpdate",
-                name="ProcessBeforeEndpointUpdate",
-                after="Rename",
-            )
-        apply_process_before_endpoint_update()
-        # Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
-        # about average latency and the amount of predictions over time
-        def apply_update_endpoint():
-            graph.add_step(
-                "UpdateEndpoint",
-                name="UpdateEndpoint",
-                after="ProcessBeforeEndpointUpdate",
-                project=self.project,
-            )
-        apply_update_endpoint()
-        # (only for V3IO KV target) - Apply infer_schema on the model endpoints table for generating schema file
-        # which will be used by Grafana monitoring dashboards
-        def apply_infer_schema():
-            graph.add_step(
-                "InferSchema",
-                name="InferSchema",
-                after="UpdateEndpoint",
-                v3io_framesd=self.v3io_framesd,
-                container=self.kv_container,
-                table=self.kv_path,
-            )
-        if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
-            apply_infer_schema()
-        tsdb_connector.apply_monitoring_stream_steps(graph=graph)
+        tsdb_connector.apply_monitoring_stream_steps(
+            graph=graph,
+            aggregate_windows=self.aggregate_windows,
+            aggregate_period=self.aggregate_period,
+        )
         # Parquet branch
         # Filter and validate different keys before writing the data to Parquet target
@@ -342,91 +249,6 @@ class EventStreamProcessor:
         apply_parquet_target()
-class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
-    def __init__(self, **kwargs):
-        """
-        Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
-        Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
-        average latency and the number of predictions (per 5min and 1hour).
-        :returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
-        """
-        super().__init__(**kwargs)
-    def do(self, event):
-        # Compute prediction per second
-        event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
-            float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
-        )
-        # Filter relevant keys
-        e = {
-            k: event[k]
-            for k in [
-                EventFieldType.FUNCTION_URI,
-                EventFieldType.MODEL,
-                EventFieldType.MODEL_CLASS,
-                EventFieldType.ENDPOINT_ID,
-                EventFieldType.LABELS,
-                EventFieldType.FIRST_REQUEST,
-                EventFieldType.LAST_REQUEST,
-                EventFieldType.ERROR_COUNT,
-            ]
-        }
-        # Add generic metrics statistics
-        generic_metrics = {
-            k: event[k]
-            for k in [
-                EventLiveStats.LATENCY_AVG_5M,
-                EventLiveStats.LATENCY_AVG_1H,
-                EventLiveStats.PREDICTIONS_PER_SECOND,
-                EventLiveStats.PREDICTIONS_COUNT_5M,
-                EventLiveStats.PREDICTIONS_COUNT_1H,
-            ]
-        }
-        e[EventFieldType.METRICS] = json.dumps(
-            {EventKeyMetrics.GENERIC: generic_metrics}
-        )
-        # Write labels as json string as required by the DB format
-        e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
-        return e
-class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
-    def __init__(self, **kwargs) -> None:
-        """
-        Generate the model endpoint ID based on the event parameters and attach it to the event.
-        """
-        super().__init__(**kwargs)
-    def do(self, full_event) -> typing.Union[storey.Event, None]:
-        # Getting model version and function uri from event
-        # and use them for retrieving the endpoint_id
-        function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
-        if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
-            return None
-        model = full_event.body.get(EventFieldType.MODEL)
-        if not is_not_none(model, [EventFieldType.MODEL]):
-            return None
-        version = full_event.body.get(EventFieldType.VERSION)
-        versioned_model = f"{model}:{version}" if version else f"{model}:latest"
-        endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
-            function_uri=function_uri,
-            versioned_model=versioned_model,
-        )
-        endpoint_id = str(endpoint_id)
-        full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
-        full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
-        return full_event
 class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
     def __init__(self, **kwargs):
         """
@@ -499,20 +321,27 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
     def do(self, full_event):
         event = full_event.body
+        # Getting model version and function uri from event
+        # and use them for retrieving the endpoint_id
+        function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
+        if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
+            return None
-        versioned_model = event[EventFieldType.VERSIONED_MODEL]
+        model = full_event.body.get(EventFieldType.MODEL)
+        if not is_not_none(model, [EventFieldType.MODEL]):
+            return None
+        version = full_event.body.get(EventFieldType.VERSION)
+        versioned_model = f"{model}:{version}" if version else f"{model}:latest"
+        full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
-        function_uri = event[EventFieldType.FUNCTION_URI]
         # In case this process fails, resume state from existing record
-        self.resume_state(endpoint_id)
-        # If error key has been found in the current event,
-        # increase the error counter by 1 and raise the error description
-        error = event.get("error")
-        if error:  # TODO: delete this in ML-7456
-            self.error_count[endpoint_id] += 1
-            raise mlrun.errors.MLRunInvalidArgumentError(str(error))
+        self.resume_state(
+            endpoint_id=endpoint_id,
+            endpoint_name=full_event.body.get(EventFieldType.MODEL),
+        )
         # Validate event fields
         model_class = event.get("model_class") or event.get("class")
@@ -536,11 +365,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
             # Set time for the first request of the current endpoint
             self.first_request[endpoint_id] = timestamp
-        # Validate that the request time of the current event is later than the previous request time
-        self._validate_last_request_timestamp(
-            endpoint_id=endpoint_id, timestamp=timestamp
-        )
         # Set time for the last reqeust of the current endpoint
         self.last_request[endpoint_id] = timestamp
@@ -610,6 +434,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 {
                     EventFieldType.FUNCTION_URI: function_uri,
                     EventFieldType.MODEL: versioned_model,
+                    EventFieldType.ENDPOINT_NAME: event.get(EventFieldType.MODEL),
                     EventFieldType.MODEL_CLASS: model_class,
                     EventFieldType.TIMESTAMP: timestamp,
                     EventFieldType.ENDPOINT_ID: endpoint_id,
@@ -636,33 +461,19 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         storey_event = storey.Event(body=events, key=endpoint_id)
         return storey_event
-    def _validate_last_request_timestamp(self, endpoint_id: str, timestamp: str):
-        """Validate that the request time of the current event is later than the previous request time that has
-        already been processed.
-        :param endpoint_id: The unique id of the model endpoint.
-        :param timestamp:   Event request time as a string.
-        :raise MLRunPreconditionFailedError: If the request time of the current is later than the previous request time.
-        """
-        if (
-            endpoint_id in self.last_request
-            and self.last_request[endpoint_id] > timestamp
-        ):
-            logger.error(
-                f"current event request time {timestamp} is earlier than the last request time "
-                f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
-            )
-    def resume_state(self, endpoint_id):
+    def resume_state(self, endpoint_id, endpoint_name):
         # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
         # left them
         if endpoint_id not in self.endpoints:
             logger.info("Trying to resume state", endpoint_id=endpoint_id)
-            endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
-                project=self.project,
-                endpoint_id=endpoint_id,
+            endpoint_record = (
+                mlrun.db.get_run_db()
+                .get_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    name=endpoint_name,
+                )
+                .flat_dict()
             )
             # If model endpoint found, get first_request, last_request and error_count values
@@ -736,6 +547,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # and labels columns were not found in the current event
         self.feature_names = {}
         self.label_columns = {}
+        self.first_request = {}
         # Dictionary to manage the model endpoint types - important for the V3IO TSDB
         self.endpoint_type = {}
@@ -767,17 +579,22 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             if isinstance(feature_value, int):
                 feature_values[index] = float(feature_value)
+        attributes_to_update = {}
+        endpoint_record = None
         # Get feature names and label columns
         if endpoint_id not in self.feature_names:
-            endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
-                project=self.project,
-                endpoint_id=endpoint_id,
+            endpoint_record = (
+                mlrun.db.get_run_db()
+                .get_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    name=event[EventFieldType.ENDPOINT_NAME],
+                )
+                .flat_dict()
             )
             feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
-            feature_names = json.loads(feature_names) if feature_names else None
             label_columns = endpoint_record.get(EventFieldType.LABEL_NAMES)
-            label_columns = json.loads(label_columns) if label_columns else None
             # If feature names were not found,
             # try to retrieve them from the previous events of the current process
@@ -795,13 +612,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 ]
                 # Update the endpoint record with the generated features
-                update_endpoint_record(
-                    project=self.project,
-                    endpoint_id=endpoint_id,
-                    attributes={
-                        EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
-                    },
-                )
+                attributes_to_update[EventFieldType.FEATURE_NAMES] = feature_names
                 if endpoint_type != EndpointType.ROUTER.value:
                     update_monitoring_feature_set(
@@ -822,12 +633,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 label_columns = [
                     f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
                 ]
-                update_endpoint_record(
-                    project=self.project,
-                    endpoint_id=endpoint_id,
-                    attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
-                )
+                attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns
                 if endpoint_type != EndpointType.ROUTER.value:
                     update_monitoring_feature_set(
                         endpoint_record=endpoint_record,
@@ -848,6 +654,37 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             # Update the endpoint type within the endpoint types dictionary
             self.endpoint_type[endpoint_id] = endpoint_type
+        # Update the first request time in the endpoint record
+        if endpoint_id not in self.first_request:
+            endpoint_record = endpoint_record or (
+                mlrun.db.get_run_db()
+                .get_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    name=event[EventFieldType.ENDPOINT_NAME],
+                )
+                .flat_dict()
+            )
+            if not endpoint_record.get(EventFieldType.FIRST_REQUEST):
+                attributes_to_update[EventFieldType.FIRST_REQUEST] = (
+                    mlrun.utils.enrich_datetime_with_tz_info(
+                        event[EventFieldType.FIRST_REQUEST]
+                    )
+                )
+            self.first_request[endpoint_id] = True
+        if attributes_to_update:
+            logger.info(
+                "Updating endpoint record",
+                endpoint_id=endpoint_id,
+                attributes=attributes_to_update,
+            )
+            update_endpoint_record(
+                project=self.project,
+                endpoint_id=endpoint_id,
+                attributes=attributes_to_update,
+                endpoint_name=event[EventFieldType.ENDPOINT_NAME],
+            )
         # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
         feature_names = self.feature_names[endpoint_id]
         self._map_dictionary_values(
@@ -898,30 +735,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             event[mapping_dictionary][name] = value
-class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
-    def __init__(self, project: str, **kwargs):
-        """
-        Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
-        the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
-        such as "Model Monitoring - Performance" which can be found in Grafana.
-        :returns: Event as a dictionary (without any changes) for the next step (InferSchema).
-        """
-        super().__init__(**kwargs)
-        self.project = project
-    def do(self, event: dict):
-        # Remove labels from the event
-        event.pop(EventFieldType.LABELS)
-        update_endpoint_record(
-            project=self.project,
-            endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
-            attributes=event,
-        )
-        return event
 class InferSchema(mlrun.feature_store.steps.MapClass):
     def __init__(
         self,
@@ -966,14 +779,14 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
 def update_endpoint_record(
     project: str,
     endpoint_id: str,
+    endpoint_name: str,
     attributes: dict,
 ):
-    model_endpoint_store = mlrun.model_monitoring.get_store_object(
+    mlrun.db.get_run_db().patch_model_endpoint(
         project=project,
-    )
-    model_endpoint_store.update_model_endpoint(
-        endpoint_id=endpoint_id, attributes=attributes
+        endpoint_id=endpoint_id,
+        attributes=attributes,
+        name=endpoint_name,
     )

mlrun/model_monitoring/writer.py CHANGED Viewed

@@ -21,7 +21,6 @@ import mlrun.common.schemas
 import mlrun.common.schemas.alert as alert_objects
 import mlrun.model_monitoring
 from mlrun.common.schemas.model_monitoring.constants import (
-    EventFieldType,
     HistogramDataDriftApplicationConstants,
     MetricData,
     ResultData,
@@ -121,9 +120,6 @@ class ModelMonitoringWriter(StepToDict):
             notification_types=[NotificationKind.slack]
         )
-        self._app_result_store = mlrun.model_monitoring.get_store_object(
-            project=self.project, secret_provider=secret_provider
-        )
         self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
             project=self.project, secret_provider=secret_provider
         )
@@ -266,14 +262,9 @@ class ModelMonitoringWriter(StepToDict):
                 == ResultStatusApp.potential_detection.value
             )
         ):
-            endpoint_id = event[WriterEvent.ENDPOINT_ID]
-            endpoint_record = self._endpoints_records.setdefault(
-                endpoint_id,
-                self._app_result_store.get_model_endpoint(endpoint_id=endpoint_id),
-            )
             event_value = {
                 "app_name": event[WriterEvent.APPLICATION_NAME],
-                "model": endpoint_record.get(EventFieldType.MODEL),
+                "model": event[WriterEvent.ENDPOINT_NAME],
                 "model_endpoint_id": event[WriterEvent.ENDPOINT_ID],
                 "result_name": event[ResultData.RESULT_NAME],
                 "result_value": event[ResultData.RESULT_VALUE],

mlrun/projects/pipelines.py CHANGED Viewed

@@ -39,7 +39,7 @@ from mlrun.utils import (
 from ..common.helpers import parse_versioned_object_uri
 from ..config import config
-from ..run import _run_pipeline, wait_for_pipeline_completion
+from ..run import _run_pipeline, retry_pipeline, wait_for_pipeline_completion
 from ..runtimes.pod import AutoMountType
@@ -421,6 +421,13 @@ class _PipelineRunStatus:
             self._state = returned_state
         return self._state
+    def retry(self) -> str:
+        run_id = self._engine.retry(
+            self,
+            project=self.project,
+        )
+        return run_id
     def __str__(self):
         return str(self.run_id)
@@ -440,6 +447,17 @@ class _PipelineRunner(abc.ABC):
             f"Save operation not supported in {cls.engine} pipeline engine"
         )
+    @classmethod
+    @abc.abstractmethod
+    def retry(
+        cls,
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+    ) -> str:
+        raise NotImplementedError(
+            f"Retry operation not supported in {cls.engine} pipeline engine"
+        )
     @classmethod
     @abc.abstractmethod
     def run(
@@ -635,6 +653,24 @@ class _KFPRunner(_PipelineRunner):
         pipeline_context.clear()
         return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
+    @classmethod
+    def retry(
+        cls,
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+    ) -> str:
+        project_name = project.metadata.name if project else ""
+        logger.info(
+            "Retrying pipeline",
+            run_id=run.run_id,
+            project=project_name,
+        )
+        run_id = retry_pipeline(
+            run.run_id,
+            project=project_name,
+        )
+        return run_id
     @staticmethod
     def wait_for_completion(
         run: "_PipelineRunStatus",

mlrun 1.8.0rc5__py3-none-any.whl → 1.8.0rc9__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc5py3-none-any.whl → 1.8.0rc9py3-none-any.whl