PyPI - mlrun - Versions diffs - 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl - Mend

mlrun 1.7.1rc10py3-none-any.whl → 1.8.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show

mlrun/__init__.py +23 -21
mlrun/__main__.py +3 -3
mlrun/alerts/alert.py +148 -14
mlrun/artifacts/__init__.py +1 -2
mlrun/artifacts/base.py +46 -12
mlrun/artifacts/dataset.py +16 -16
mlrun/artifacts/document.py +334 -0
mlrun/artifacts/manager.py +15 -13
mlrun/artifacts/model.py +66 -53
mlrun/common/constants.py +7 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +1 -29
mlrun/common/runtimes/constants.py +1 -2
mlrun/common/schemas/__init__.py +6 -2
mlrun/common/schemas/alert.py +111 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +11 -7
mlrun/common/schemas/auth.py +6 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +2 -3
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +66 -14
mlrun/common/schemas/model_monitoring/grafana.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +137 -0
mlrun/common/schemas/pipeline.py +2 -2
mlrun/common/schemas/project.py +25 -17
mlrun/common/schemas/runs.py +2 -2
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +5 -5
mlrun/config.py +67 -10
mlrun/data_types/__init__.py +0 -2
mlrun/data_types/infer.py +3 -1
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +0 -2
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +12 -4
mlrun/datastore/datastore.py +9 -3
mlrun/datastore/datastore_profile.py +79 -20
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +4 -1
mlrun/datastore/sources.py +52 -51
mlrun/datastore/store_resources.py +0 -2
mlrun/datastore/targets.py +21 -21
mlrun/datastore/utils.py +2 -2
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +194 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +208 -82
mlrun/db/factory.py +0 -3
mlrun/db/httpdb.py +1237 -386
mlrun/db/nopdb.py +201 -74
mlrun/errors.py +2 -2
mlrun/execution.py +136 -50
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +41 -40
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +27 -24
mlrun/feature_store/retrieval/base.py +14 -9
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/steps.py +2 -2
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +29 -27
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/launcher/base.py +3 -4
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +4 -3
mlrun/model.py +117 -46
mlrun/model_monitoring/__init__.py +4 -4
mlrun/model_monitoring/api.py +61 -59
mlrun/model_monitoring/applications/_application_steps.py +17 -17
mlrun/model_monitoring/applications/base.py +165 -6
mlrun/model_monitoring/applications/context.py +88 -37
mlrun/model_monitoring/applications/evidently_base.py +0 -1
mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
mlrun/model_monitoring/applications/results.py +55 -3
mlrun/model_monitoring/controller.py +207 -239
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +156 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/base.py +78 -25
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
mlrun/model_monitoring/helpers.py +152 -49
mlrun/model_monitoring/stream_processing.py +99 -283
mlrun/model_monitoring/tracking_policy.py +10 -3
mlrun/model_monitoring/writer.py +48 -36
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +1 -1
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +31 -14
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +27 -27
mlrun/projects/pipelines.py +71 -36
mlrun/projects/project.py +865 -206
mlrun/run.py +53 -10
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +15 -11
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/generators.py +2 -1
mlrun/runtimes/kubejob.py +4 -5
mlrun/runtimes/mounts.py +572 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -11
mlrun/runtimes/nuclio/function.py +19 -17
mlrun/runtimes/nuclio/serving.py +18 -11
mlrun/runtimes/pod.py +154 -45
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +21 -11
mlrun/runtimes/utils.py +6 -5
mlrun/serving/merger.py +6 -4
mlrun/serving/remote.py +18 -17
mlrun/serving/routers.py +185 -172
mlrun/serving/server.py +7 -1
mlrun/serving/states.py +97 -78
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +74 -65
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +1 -1
mlrun/utils/clones.py +1 -1
mlrun/utils/helpers.py +54 -16
mlrun/utils/logger.py +106 -4
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +33 -14
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +6 -6
mlrun/utils/notifications/notification/webhook.py +6 -6
mlrun/utils/notifications/notification_pusher.py +86 -44
mlrun/utils/regex.py +3 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +21 -16
mlrun-1.8.0rc8.dist-info/RECORD +347 -0
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.1rc10.dist-info/RECORD +0 -351
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +0 -0
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import collections
 import datetime
-import json
 import os
 import typing
@@ -30,14 +29,12 @@ import mlrun.model_monitoring.db
 import mlrun.serving.states
 import mlrun.utils
 from mlrun.common.schemas.model_monitoring.constants import (
+    EndpointType,
     EventFieldType,
-    EventKeyMetrics,
-    EventLiveStats,
     FileTargetKind,
-    ModelEndpointTarget,
     ProjectSecretKeys,
 )
-from mlrun.model_monitoring.db import StoreBase, TSDBConnector
+from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.utils import logger
@@ -51,7 +48,7 @@ class EventStreamProcessor:
         parquet_target: str,
         aggregate_windows: typing.Optional[list[str]] = None,
         aggregate_period: str = "5m",
-        model_monitoring_access_key: str = None,
+        model_monitoring_access_key: typing.Optional[str] = None,
     ):
         # General configurations, mainly used for the storey steps in the future serving graph
         self.project = project
@@ -85,7 +82,7 @@ class EventStreamProcessor:
         v3io_access_key: typing.Optional[str] = None,
         v3io_framesd: typing.Optional[str] = None,
         v3io_api: typing.Optional[str] = None,
-        model_monitoring_access_key: str = None,
+        model_monitoring_access_key: typing.Optional[str] = None,
     ):
         # Get the V3IO configurations
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
@@ -101,18 +98,6 @@ class EventStreamProcessor:
             v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
         )
-        # KV path
-        kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
-            project=self.project, kind=FileTargetKind.ENDPOINTS
-        )
-        (
-            _,
-            self.kv_container,
-            self.kv_path,
-        ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
-            kv_path
-        )
         # TSDB path and configurations
         tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
             project=self.project, kind=FileTargetKind.EVENTS
@@ -133,7 +118,6 @@ class EventStreamProcessor:
         self,
         fn: mlrun.runtimes.ServingRuntime,
         tsdb_connector: TSDBConnector,
-        endpoint_store: StoreBase,
     ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -162,31 +146,23 @@ class EventStreamProcessor:
         :param fn: A serving function.
         :param tsdb_connector: Time series database connector.
-        :param endpoint_store: KV/SQL store used for endpoint data.
         """
         graph = typing.cast(
             mlrun.serving.states.RootFlowStep,
             fn.set_topology(mlrun.serving.states.StepKinds.flow),
         )
-        graph.add_step(
-            "ExtractEndpointID",
-            "extract_endpoint",
-            full_event=True,
-        )
         # split the graph between event with error vs valid event
         graph.add_step(
             "storey.Filter",
             "FilterError",
-            after="extract_endpoint",
             _fn="(event.get('error') is None)",
         )
         graph.add_step(
             "storey.Filter",
             "ForwardError",
-            after="extract_endpoint",
             _fn="(event.get('error') is not None)",
         )
@@ -198,7 +174,7 @@ class EventStreamProcessor:
         def apply_process_endpoint_event():
             graph.add_step(
                 "ProcessEndpointEvent",
-                after="extract_endpoint",  # TODO: change this to FilterError in ML-7456
+                after="FilterError",
                 full_event=True,
                 project=self.project,
             )
@@ -233,79 +209,11 @@ class EventStreamProcessor:
             )
         apply_map_feature_names()
-        # Calculate number of predictions and average latency
-        def apply_storey_aggregations():
-            # Calculate number of predictions for each window (5 min and 1 hour by default)
-            graph.add_step(
-                class_name="storey.AggregateByKey",
-                aggregates=[
-                    {
-                        "name": EventFieldType.LATENCY,
-                        "column": EventFieldType.LATENCY,
-                        "operations": ["count", "avg"],
-                        "windows": self.aggregate_windows,
-                        "period": self.aggregate_period,
-                    }
-                ],
-                name=EventFieldType.LATENCY,
-                after="MapFeatureNames",
-                step_name="Aggregates",
-                table=".",
-                key_field=EventFieldType.ENDPOINT_ID,
-            )
-            # Calculate average latency time for each window (5 min and 1 hour by default)
-            graph.add_step(
-                class_name="storey.Rename",
-                mapping={
-                    "latency_count_5m": EventLiveStats.PREDICTIONS_COUNT_5M,
-                    "latency_count_1h": EventLiveStats.PREDICTIONS_COUNT_1H,
-                },
-                name="Rename",
-                after=EventFieldType.LATENCY,
-            )
-        apply_storey_aggregations()
-        # KV/SQL branch
-        # Filter relevant keys from the event before writing the data into the database table
-        def apply_process_before_endpoint_update():
-            graph.add_step(
-                "ProcessBeforeEndpointUpdate",
-                name="ProcessBeforeEndpointUpdate",
-                after="Rename",
-            )
-        apply_process_before_endpoint_update()
-        # Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
-        # about average latency and the amount of predictions over time
-        def apply_update_endpoint():
-            graph.add_step(
-                "UpdateEndpoint",
-                name="UpdateEndpoint",
-                after="ProcessBeforeEndpointUpdate",
-                project=self.project,
-            )
-        apply_update_endpoint()
-        # (only for V3IO KV target) - Apply infer_schema on the model endpoints table for generating schema file
-        # which will be used by Grafana monitoring dashboards
-        def apply_infer_schema():
-            graph.add_step(
-                "InferSchema",
-                name="InferSchema",
-                after="UpdateEndpoint",
-                v3io_framesd=self.v3io_framesd,
-                container=self.kv_container,
-                table=self.kv_path,
-            )
-        if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
-            apply_infer_schema()
-        tsdb_connector.apply_monitoring_stream_steps(graph=graph)
+        tsdb_connector.apply_monitoring_stream_steps(
+            graph=graph,
+            aggregate_windows=self.aggregate_windows,
+            aggregate_period=self.aggregate_period,
+        )
         # Parquet branch
         # Filter and validate different keys before writing the data to Parquet target
@@ -341,91 +249,6 @@ class EventStreamProcessor:
         apply_parquet_target()
-class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
-    def __init__(self, **kwargs):
-        """
-        Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
-        Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
-        average latency and the number of predictions (per 5min and 1hour).
-        :returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
-        """
-        super().__init__(**kwargs)
-    def do(self, event):
-        # Compute prediction per second
-        event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
-            float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
-        )
-        # Filter relevant keys
-        e = {
-            k: event[k]
-            for k in [
-                EventFieldType.FUNCTION_URI,
-                EventFieldType.MODEL,
-                EventFieldType.MODEL_CLASS,
-                EventFieldType.ENDPOINT_ID,
-                EventFieldType.LABELS,
-                EventFieldType.FIRST_REQUEST,
-                EventFieldType.LAST_REQUEST,
-                EventFieldType.ERROR_COUNT,
-            ]
-        }
-        # Add generic metrics statistics
-        generic_metrics = {
-            k: event[k]
-            for k in [
-                EventLiveStats.LATENCY_AVG_5M,
-                EventLiveStats.LATENCY_AVG_1H,
-                EventLiveStats.PREDICTIONS_PER_SECOND,
-                EventLiveStats.PREDICTIONS_COUNT_5M,
-                EventLiveStats.PREDICTIONS_COUNT_1H,
-            ]
-        }
-        e[EventFieldType.METRICS] = json.dumps(
-            {EventKeyMetrics.GENERIC: generic_metrics}
-        )
-        # Write labels as json string as required by the DB format
-        e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
-        return e
-class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
-    def __init__(self, **kwargs) -> None:
-        """
-        Generate the model endpoint ID based on the event parameters and attach it to the event.
-        """
-        super().__init__(**kwargs)
-    def do(self, full_event) -> typing.Union[storey.Event, None]:
-        # Getting model version and function uri from event
-        # and use them for retrieving the endpoint_id
-        function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
-        if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
-            return None
-        model = full_event.body.get(EventFieldType.MODEL)
-        if not is_not_none(model, [EventFieldType.MODEL]):
-            return None
-        version = full_event.body.get(EventFieldType.VERSION)
-        versioned_model = f"{model}:{version}" if version else f"{model}:latest"
-        endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
-            function_uri=function_uri,
-            versioned_model=versioned_model,
-        )
-        endpoint_id = str(endpoint_id)
-        full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
-        full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
-        return full_event
 class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
     def __init__(self, **kwargs):
         """
@@ -498,20 +321,27 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
     def do(self, full_event):
         event = full_event.body
+        # Getting model version and function uri from event
+        # and use them for retrieving the endpoint_id
+        function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
+        if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
+            return None
+        model = full_event.body.get(EventFieldType.MODEL)
+        if not is_not_none(model, [EventFieldType.MODEL]):
+            return None
-        versioned_model = event[EventFieldType.VERSIONED_MODEL]
+        version = full_event.body.get(EventFieldType.VERSION)
+        versioned_model = f"{model}:{version}" if version else f"{model}:latest"
+        full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
-        function_uri = event[EventFieldType.FUNCTION_URI]
         # In case this process fails, resume state from existing record
-        self.resume_state(endpoint_id)
-        # If error key has been found in the current event,
-        # increase the error counter by 1 and raise the error description
-        error = event.get("error")
-        if error:  # TODO: delete this in ML-7456
-            self.error_count[endpoint_id] += 1
-            raise mlrun.errors.MLRunInvalidArgumentError(str(error))
+        self.resume_state(
+            endpoint_id,
+            full_event.body.get(EventFieldType.MODEL),
+        )
         # Validate event fields
         model_class = event.get("model_class") or event.get("class")
@@ -535,11 +365,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
             # Set time for the first request of the current endpoint
             self.first_request[endpoint_id] = timestamp
-        # Validate that the request time of the current event is later than the previous request time
-        self._validate_last_request_timestamp(
-            endpoint_id=endpoint_id, timestamp=timestamp
-        )
         # Set time for the last reqeust of the current endpoint
         self.last_request[endpoint_id] = timestamp
@@ -609,6 +434,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 {
                     EventFieldType.FUNCTION_URI: function_uri,
                     EventFieldType.MODEL: versioned_model,
+                    EventFieldType.ENDPOINT_NAME: event.get(EventFieldType.MODEL),
                     EventFieldType.MODEL_CLASS: model_class,
                     EventFieldType.TIMESTAMP: timestamp,
                     EventFieldType.ENDPOINT_ID: endpoint_id,
@@ -635,33 +461,19 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         storey_event = storey.Event(body=events, key=endpoint_id)
         return storey_event
-    def _validate_last_request_timestamp(self, endpoint_id: str, timestamp: str):
-        """Validate that the request time of the current event is later than the previous request time that has
-        already been processed.
-        :param endpoint_id: The unique id of the model endpoint.
-        :param timestamp:   Event request time as a string.
-        :raise MLRunPreconditionFailedError: If the request time of the current is later than the previous request time.
-        """
-        if (
-            endpoint_id in self.last_request
-            and self.last_request[endpoint_id] > timestamp
-        ):
-            logger.error(
-                f"current event request time {timestamp} is earlier than the last request time "
-                f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
-            )
-    def resume_state(self, endpoint_id):
+    def resume_state(self, endpoint_id, endpoint_name):
         # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
         # left them
         if endpoint_id not in self.endpoints:
             logger.info("Trying to resume state", endpoint_id=endpoint_id)
-            endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
-                project=self.project,
-                endpoint_id=endpoint_id,
+            endpoint_record = (
+                mlrun.db.get_run_db()
+                .get_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    name=endpoint_name,
+                )
+                .flat_dict()
             )
             # If model endpoint found, get first_request, last_request and error_count values
@@ -735,6 +547,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # and labels columns were not found in the current event
         self.feature_names = {}
         self.label_columns = {}
+        self.first_request = {}
         # Dictionary to manage the model endpoint types - important for the V3IO TSDB
         self.endpoint_type = {}
@@ -766,23 +579,29 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             if isinstance(feature_value, int):
                 feature_values[index] = float(feature_value)
+        attributes_to_update = {}
+        endpoint_record = None
         # Get feature names and label columns
         if endpoint_id not in self.feature_names:
-            endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
-                project=self.project,
-                endpoint_id=endpoint_id,
+            endpoint_record = (
+                mlrun.db.get_run_db()
+                .get_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    name=event[EventFieldType.ENDPOINT_NAME],
+                )
+                .flat_dict()
             )
             feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
-            feature_names = json.loads(feature_names) if feature_names else None
             label_columns = endpoint_record.get(EventFieldType.LABEL_NAMES)
-            label_columns = json.loads(label_columns) if label_columns else None
             # If feature names were not found,
             # try to retrieve them from the previous events of the current process
             if not feature_names and self._infer_columns_from_data:
                 feature_names = self._infer_feature_names_from_data(event)
+            endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
             if not feature_names:
                 logger.warn(
                     "Feature names are not initialized, they will be automatically generated",
@@ -793,19 +612,14 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 ]
                 # Update the endpoint record with the generated features
-                update_endpoint_record(
-                    project=self.project,
-                    endpoint_id=endpoint_id,
-                    attributes={
-                        EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
-                    },
-                )
+                attributes_to_update[EventFieldType.FEATURE_NAMES] = feature_names
-                update_monitoring_feature_set(
-                    endpoint_record=endpoint_record,
-                    feature_names=feature_names,
-                    feature_values=feature_values,
-                )
+                if endpoint_type != EndpointType.ROUTER.value:
+                    update_monitoring_feature_set(
+                        endpoint_record=endpoint_record,
+                        feature_names=feature_names,
+                        feature_values=feature_values,
+                    )
             # Similar process with label columns
             if not label_columns and self._infer_columns_from_data:
@@ -819,17 +633,13 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 label_columns = [
                     f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
                 ]
-                update_endpoint_record(
-                    project=self.project,
-                    endpoint_id=endpoint_id,
-                    attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
-                )
-                update_monitoring_feature_set(
-                    endpoint_record=endpoint_record,
-                    feature_names=label_columns,
-                    feature_values=label_values,
-                )
+                attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns
+                if endpoint_type != EndpointType.ROUTER.value:
+                    update_monitoring_feature_set(
+                        endpoint_record=endpoint_record,
+                        feature_names=label_columns,
+                        feature_values=label_values,
+                    )
             self.label_columns[endpoint_id] = label_columns
             self.feature_names[endpoint_id] = feature_names
@@ -842,9 +652,39 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             )
             # Update the endpoint type within the endpoint types dictionary
-            endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
             self.endpoint_type[endpoint_id] = endpoint_type
+        # Update the first request time in the endpoint record
+        if endpoint_id not in self.first_request:
+            endpoint_record = endpoint_record or (
+                mlrun.db.get_run_db()
+                .get_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    name=event[EventFieldType.ENDPOINT_NAME],
+                )
+                .flat_dict()
+            )
+            if not endpoint_record.get(EventFieldType.FIRST_REQUEST):
+                attributes_to_update[EventFieldType.FIRST_REQUEST] = (
+                    mlrun.utils.enrich_datetime_with_tz_info(
+                        event[EventFieldType.FIRST_REQUEST]
+                    )
+                )
+            self.first_request[endpoint_id] = True
+        if attributes_to_update:
+            logger.info(
+                "Updating endpoint record",
+                endpoint_id=endpoint_id,
+                attributes=attributes_to_update,
+            )
+            update_endpoint_record(
+                project=self.project,
+                endpoint_id=endpoint_id,
+                attributes=attributes_to_update,
+                endpoint_name=event[EventFieldType.ENDPOINT_NAME],
+            )
         # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
         feature_names = self.feature_names[endpoint_id]
         self._map_dictionary_values(
@@ -895,30 +735,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             event[mapping_dictionary][name] = value
-class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
-    def __init__(self, project: str, **kwargs):
-        """
-        Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
-        the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
-        such as "Model Monitoring - Performance" which can be found in Grafana.
-        :returns: Event as a dictionary (without any changes) for the next step (InferSchema).
-        """
-        super().__init__(**kwargs)
-        self.project = project
-    def do(self, event: dict):
-        # Remove labels from the event
-        event.pop(EventFieldType.LABELS)
-        update_endpoint_record(
-            project=self.project,
-            endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
-            attributes=event,
-        )
-        return event
 class InferSchema(mlrun.feature_store.steps.MapClass):
     def __init__(
         self,
@@ -963,14 +779,14 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
 def update_endpoint_record(
     project: str,
     endpoint_id: str,
+    endpoint_name: str,
     attributes: dict,
 ):
-    model_endpoint_store = mlrun.model_monitoring.get_store_object(
+    mlrun.db.get_run_db().patch_model_endpoint(
         project=project,
-    )
-    model_endpoint_store.update_model_endpoint(
-        endpoint_id=endpoint_id, attributes=attributes
+        endpoint_id=endpoint_id,
+        attributes=attributes,
+        name=endpoint_name,
     )

mlrun/model_monitoring/tracking_policy.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import warnings
-from typing import Union
+from typing import Optional, Union
 import mlrun.common.schemas.schedule
 import mlrun.model
@@ -74,7 +74,9 @@ class TrackingPolicy(mlrun.model.ModelObj):
         self.default_controller_image = default_controller_image
     @classmethod
-    def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
+    def from_dict(
+        cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
+    ):
         new_obj = super().from_dict(
             struct, fields=cls._dict_fields, deprecated_fields=deprecated_fields
         )
@@ -102,7 +104,12 @@ class TrackingPolicy(mlrun.model.ModelObj):
                 )
         return new_obj
-    def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
+    def to_dict(
+        self,
+        fields: Optional[list] = None,
+        exclude: Optional[list] = None,
+        strip: bool = False,
+    ):
         struct = super().to_dict(
             fields,
             exclude=[

mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.1rc10py3-none-any.whl → 1.8.0rc8py3-none-any.whl