PyPI - mlrun - Versions diffs - 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl - Mend

mlrun 1.7.0rc14py3-none-any.whl → 1.7.0rc22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (160) hide show

mlrun/__init__.py +10 -1
mlrun/__main__.py +23 -111
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +169 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +36 -253
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +46 -42
mlrun/artifacts/model.py +9 -141
mlrun/artifacts/plots.py +14 -375
mlrun/common/constants.py +65 -3
mlrun/common/formatters/__init__.py +19 -0
mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +10 -5
mlrun/common/schemas/alert.py +92 -11
mlrun/common/schemas/api_gateway.py +56 -0
mlrun/common/schemas/artifact.py +15 -5
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/model_monitoring/__init__.py +15 -3
mlrun/common/schemas/model_monitoring/constants.py +58 -7
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
mlrun/common/schemas/pipeline.py +0 -9
mlrun/common/schemas/project.py +5 -11
mlrun/common/types.py +1 -0
mlrun/config.py +30 -9
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/base.py +41 -9
mlrun/datastore/datastore.py +6 -2
mlrun/datastore/datastore_profile.py +56 -4
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/sources.py +147 -7
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +110 -42
mlrun/datastore/utils.py +42 -0
mlrun/db/base.py +54 -10
mlrun/db/httpdb.py +282 -79
mlrun/db/nopdb.py +52 -10
mlrun/errors.py +11 -0
mlrun/execution.py +26 -9
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +12 -47
mlrun/feature_store/feature_set.py +9 -0
mlrun/feature_store/feature_vector.py +8 -0
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +9 -9
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +16 -0
mlrun/frameworks/__init__.py +6 -0
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/tf_keras/__init__.py +4 -1
mlrun/k8s_utils.py +10 -11
mlrun/launcher/base.py +4 -3
mlrun/launcher/client.py +5 -3
mlrun/launcher/local.py +12 -2
mlrun/launcher/remote.py +9 -2
mlrun/lists.py +6 -2
mlrun/model.py +47 -21
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +42 -18
mlrun/model_monitoring/application.py +5 -305
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +280 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +3 -1
mlrun/model_monitoring/db/__init__.py +2 -0
mlrun/model_monitoring/db/stores/__init__.py +0 -2
mlrun/model_monitoring/db/stores/base/store.py +22 -37
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +316 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +401 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +658 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/helpers.py +63 -1
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +57 -216
mlrun/model_monitoring/writer.py +134 -124
mlrun/package/__init__.py +13 -1
mlrun/package/packagers/__init__.py +6 -1
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +10 -9
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +24 -12
mlrun/projects/pipelines.py +79 -102
mlrun/projects/project.py +271 -103
mlrun/render.py +15 -14
mlrun/run.py +16 -46
mlrun/runtimes/__init__.py +6 -3
mlrun/runtimes/base.py +14 -7
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +2 -1
mlrun/runtimes/local.py +12 -3
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +194 -84
mlrun/runtimes/nuclio/application/application.py +170 -8
mlrun/runtimes/nuclio/function.py +39 -49
mlrun/runtimes/pod.py +16 -36
mlrun/runtimes/remotesparkjob.py +9 -3
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +6 -45
mlrun/serving/__init__.py +8 -1
mlrun/serving/server.py +2 -1
mlrun/serving/states.py +51 -8
mlrun/serving/utils.py +19 -11
mlrun/serving/v2_serving.py +5 -1
mlrun/track/tracker.py +2 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +157 -83
mlrun/utils/logger.py +39 -7
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification/base.py +1 -1
mlrun/utils/notifications/notification/slack.py +34 -7
mlrun/utils/notifications/notification/webhook.py +1 -1
mlrun/utils/notifications/notification_pusher.py +147 -16
mlrun/utils/regex.py +9 -0
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/METADATA +14 -6
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/RECORD +158 -138
mlrun/kfpops.py +0 -865
mlrun/platforms/other.py +0 -305
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/applications/evidently_base.py ADDED Viewed

@@ -0,0 +1,211 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import uuid
+import warnings
+from typing import Union
+import pandas as pd
+import semver
+import mlrun.model_monitoring.applications.base as mm_base
+import mlrun.model_monitoring.applications.context as mm_context
+from mlrun.errors import MLRunIncompatibleVersionError
+SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
+def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
+    if ref.is_compatible(cur) or (
+        cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
+    ):
+        return
+    if cur.major == ref.major == 0 and cur.minor > ref.minor:
+        warnings.warn(
+            f"Evidently version {cur} is not compatible with the tested "
+            f"version {ref}, use at your own risk."
+        )
+    else:
+        raise MLRunIncompatibleVersionError(
+            f"Evidently version {cur} is not supported, please change to "
+            f"{ref} (or another compatible version)."
+        )
+_HAS_EVIDENTLY = False
+try:
+    import evidently  # noqa: F401
+    _check_evidently_version(
+        cur=semver.Version.parse(evidently.__version__),
+        ref=SUPPORTED_EVIDENTLY_VERSION,
+    )
+    _HAS_EVIDENTLY = True
+except ModuleNotFoundError:
+    pass
+if _HAS_EVIDENTLY:
+    from evidently.renderers.notebook_utils import determine_template
+    from evidently.report.report import Report
+    from evidently.suite.base_suite import Suite
+    from evidently.ui.type_aliases import STR_UUID
+    from evidently.ui.workspace import Workspace
+    from evidently.utils.dashboard import TemplateParams
+class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
+    def __init__(
+        self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
+    ) -> None:
+        """
+        A class for integrating Evidently for mlrun model monitoring within a monitoring application.
+        Note: evidently is not installed by default in the mlrun/mlrun image.
+        It must be installed separately to use this class.
+        :param evidently_workspace_path:    (str) The path to the Evidently workspace.
+        :param evidently_project_id:        (str) The ID of the Evidently project.
+        """
+        if not _HAS_EVIDENTLY:
+            raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
+        self.evidently_workspace = Workspace.create(evidently_workspace_path)
+        self.evidently_project_id = evidently_project_id
+        self.evidently_project = self.evidently_workspace.get_project(
+            evidently_project_id
+        )
+    def log_evidently_object(
+        self, evidently_object: Union["Report", "Suite"], artifact_name: str
+    ):
+        """
+         Logs an Evidently report or suite as an artifact.
+        :param evidently_object:    (Union[Report, Suite]) The Evidently report or suite object.
+        :param artifact_name:       (str) The name for the logged artifact.
+        """
+        evidently_object_html = evidently_object.get_html()
+        self.context.log_artifact(
+            artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
+        )
+    def log_project_dashboard(
+        self,
+        timestamp_start: pd.Timestamp,
+        timestamp_end: pd.Timestamp,
+        artifact_name: str = "dashboard",
+    ):
+        """
+        Logs an Evidently project dashboard.
+        :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
+        :param timestamp_end:   (pd.Timestamp) The end timestamp for the dashboard data.
+        :param artifact_name:   (str) The name for the logged artifact.
+        """
+        dashboard_info = self.evidently_project.build_dashboard_info(
+            timestamp_start, timestamp_end
+        )
+        template_params = TemplateParams(
+            dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
+            dashboard_info=dashboard_info,
+            additional_graphs={},
+        )
+        dashboard_html = self._render(determine_template("inline"), template_params)
+        self.context.log_artifact(
+            artifact_name, body=dashboard_html.encode("utf-8"), format="html"
+        )
+    @staticmethod
+    def _render(temple_func, template_params: "TemplateParams"):
+        return temple_func(params=template_params)
+class EvidentlyModelMonitoringApplicationBaseV2(
+    mm_base.ModelMonitoringApplicationBaseV2
+):
+    def __init__(
+        self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
+    ) -> None:
+        """
+        A class for integrating Evidently for mlrun model monitoring within a monitoring application.
+        Note: evidently is not installed by default in the mlrun/mlrun image.
+        It must be installed separately to use this class.
+        :param evidently_workspace_path:    (str) The path to the Evidently workspace.
+        :param evidently_project_id:        (str) The ID of the Evidently project.
+        """
+        # TODO : more then one project (mep -> project)
+        if not _HAS_EVIDENTLY:
+            raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
+        self.evidently_workspace = Workspace.create(evidently_workspace_path)
+        self.evidently_project_id = evidently_project_id
+        self.evidently_project = self.evidently_workspace.get_project(
+            evidently_project_id
+        )
+    @staticmethod
+    def log_evidently_object(
+        monitoring_context: mm_context.MonitoringApplicationContext,
+        evidently_object: Union["Report", "Suite"],
+        artifact_name: str,
+    ):
+        """
+         Logs an Evidently report or suite as an artifact.
+        :param monitoring_context:  (MonitoringApplicationContext) The monitoring context to process.
+        :param evidently_object:    (Union[Report, Suite]) The Evidently report or suite object.
+        :param artifact_name:       (str) The name for the logged artifact.
+        """
+        evidently_object_html = evidently_object.get_html()
+        monitoring_context.log_artifact(
+            artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
+        )
+    def log_project_dashboard(
+        self,
+        monitoring_context: mm_context.MonitoringApplicationContext,
+        timestamp_start: pd.Timestamp,
+        timestamp_end: pd.Timestamp,
+        artifact_name: str = "dashboard",
+    ):
+        """
+        Logs an Evidently project dashboard.
+        :param monitoring_context:  (MonitoringApplicationContext) The monitoring context to process.
+        :param timestamp_start:     (pd.Timestamp) The start timestamp for the dashboard data.
+        :param timestamp_end:       (pd.Timestamp) The end timestamp for the dashboard data.
+        :param artifact_name:       (str) The name for the logged artifact.
+        """
+        dashboard_info = self.evidently_project.build_dashboard_info(
+            timestamp_start, timestamp_end
+        )
+        template_params = TemplateParams(
+            dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
+            dashboard_info=dashboard_info,
+            additional_graphs={},
+        )
+        dashboard_html = self._render(determine_template("inline"), template_params)
+        monitoring_context.log_artifact(
+            artifact_name, body=dashboard_html.encode("utf-8"), format="html"
+        )
+    @staticmethod
+    def _render(temple_func, template_params: "TemplateParams"):
+        return temple_func(params=template_params)

mlrun/model_monitoring/applications/histogram_data_drift.py CHANGED Viewed

@@ -12,14 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 from dataclasses import dataclass
-from typing import Final, Optional, Protocol, cast
+from typing import Final, Optional, Protocol, Union, cast
 import numpy as np
-from pandas import DataFrame, Series, Timestamp
+from pandas import DataFrame, Series
 import mlrun.artifacts
 import mlrun.common.model_monitoring.helpers
+import mlrun.model_monitoring.applications.context as mm_context
+import mlrun.model_monitoring.applications.results as mm_results
 import mlrun.model_monitoring.features_drift_table as mm_drift_table
 from mlrun.common.schemas.model_monitoring.constants import (
     EventFieldType,
@@ -27,9 +30,8 @@ from mlrun.common.schemas.model_monitoring.constants import (
     ResultKindApp,
     ResultStatusApp,
 )
-from mlrun.model_monitoring.application import (
-    ModelMonitoringApplicationBase,
-    ModelMonitoringApplicationResult,
+from mlrun.model_monitoring.applications import (
+    ModelMonitoringApplicationBaseV2,
 )
 from mlrun.model_monitoring.metrics.histogram_distance import (
     HellingerDistance,
@@ -85,17 +87,34 @@ class DataDriftClassifier:
         return ResultStatusApp.no_detection
-class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
+class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
     """
     MLRun's default data drift application for model monitoring.
-    The application calculates the metrics over the features' histograms.
-    Each metric is calculated over all the features, the mean is taken,
-    and the status is returned.
+    The application expects tabular numerical data, and calculates three metrics over the features' histograms.
+    The three metrics are:
+    * Hellinger distance.
+    * Total variance distance.
+    * Kullback-Leibler divergence.
+    Each metric is calculated over all the features individually and the mean is taken as the metric value.
+    The average of Hellinger and total variance distance is taken as the result.
+    The application logs two artifacts:
+    * A JSON with the general drift per feature.
+    * A plotly table different metrics per feature.
+    This application is deployed by default when calling:
+    .. code-block:: python
+        project.enable_model_monitoring()
     """
     NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
-    METRIC_KIND: Final[ResultKindApp] = ResultKindApp.data_drift
     _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
@@ -107,8 +126,6 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
     def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
         """
-        Initialize the data drift application.
         :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
                                  If not provided, the default `DataDriftClassifier()` is used.
         """
@@ -118,17 +135,22 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
         ), "TVD and Hellinger distance are required for the general data drift result"
     def _compute_metrics_per_feature(
-        self, sample_df_stats: DataFrame, feature_stats: DataFrame
+        self, monitoring_context: mm_context.MonitoringApplicationContext
     ) -> DataFrame:
         """Compute the metrics for the different features and labels"""
         metrics_per_feature = DataFrame(
             columns=[metric_class.NAME for metric_class in self.metrics]
         )
+        feature_stats = monitoring_context.dict_to_histogram(
+            monitoring_context.feature_stats
+        )
+        sample_df_stats = monitoring_context.dict_to_histogram(
+            monitoring_context.sample_df_stats
+        )
         for feature_name in feature_stats:
             sample_hist = np.asarray(sample_df_stats[feature_name])
             reference_hist = np.asarray(feature_stats[feature_name])
-            self.context.logger.info(
+            monitoring_context.logger.info(
                 "Computing metrics for feature", feature_name=feature_name
             )
             metrics_per_feature.loc[feature_name] = {  # pyright: ignore[reportCallIssue,reportArgumentType]
@@ -137,62 +159,65 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
                 ).compute()
                 for metric in self.metrics
             }
-        self.context.logger.info("Finished computing the metrics")
+        monitoring_context.logger.info("Finished computing the metrics")
         return metrics_per_feature
-    def _add_general_drift_result(
-        self, results: list[ModelMonitoringApplicationResult], value: float
-    ) -> None:
-        """Add the general drift result to the results list and log it"""
+    def _get_general_drift_result(
+        self,
+        metrics: list[mm_results.ModelMonitoringApplicationMetric],
+        monitoring_context: mm_context.MonitoringApplicationContext,
+        metrics_per_feature: DataFrame,
+    ) -> mm_results.ModelMonitoringApplicationResult:
+        """Get the general drift result from the metrics list"""
+        value = cast(
+            float,
+            np.mean(
+                [
+                    metric.value
+                    for metric in metrics
+                    if metric.name
+                    in [
+                        f"{HellingerDistance.NAME}_mean",
+                        f"{TotalVarianceDistance.NAME}_mean",
+                    ]
+                ]
+            ),
+        )
         status = self._value_classifier.value_to_status(value)
-        results.append(
-            ModelMonitoringApplicationResult(
-                name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
-                value=value,
-                kind=self.METRIC_KIND,
-                status=status,
-            )
+        return mm_results.ModelMonitoringApplicationResult(
+            name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
+            value=value,
+            kind=ResultKindApp.data_drift,
+            status=status,
+            extra_data={
+                EventFieldType.CURRENT_STATS: json.dumps(
+                    monitoring_context.feature_stats
+                ),
+                EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
+                EventFieldType.DRIFT_STATUS: status.value,
+            },
         )
-    def _get_results(
-        self, metrics_per_feature: DataFrame
-    ) -> list[ModelMonitoringApplicationResult]:
+    @staticmethod
+    def _get_metrics(
+        metrics_per_feature: DataFrame,
+    ) -> list[mm_results.ModelMonitoringApplicationMetric]:
         """Average the metrics over the features and add the status"""
-        results: list[ModelMonitoringApplicationResult] = []
+        metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
-        self.context.logger.debug("Averaging metrics over the features")
         metrics_mean = metrics_per_feature.mean().to_dict()
-        self.context.logger.debug("Creating the results")
         for name, value in metrics_mean.items():
-            if name == KullbackLeiblerDivergence.NAME:
-                # This metric is not bounded from above [0, inf).
-                # No status is currently reported for KL divergence
-                status = ResultStatusApp.irrelevant
-            else:
-                status = self._value_classifier.value_to_status(value)
-            results.append(
-                ModelMonitoringApplicationResult(
+            metrics.append(
+                mm_results.ModelMonitoringApplicationMetric(
                     name=f"{name}_mean",
                     value=value,
-                    kind=self.METRIC_KIND,
-                    status=status,
                 )
             )
-        self._add_general_drift_result(
-            results=results,
-            value=np.mean(
-                [
-                    metrics_mean[HellingerDistance.NAME],
-                    metrics_mean[TotalVarianceDistance.NAME],
-                ]
-            ),
-        )
-        self.context.logger.info("Finished with the results")
-        return results
+        return metrics
     @staticmethod
     def _remove_timestamp_feature(
@@ -209,17 +234,21 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
             del sample_set_statistics[EventFieldType.TIMESTAMP]
         return sample_set_statistics
-    def _log_json_artifact(self, drift_per_feature_values: Series) -> None:
+    @staticmethod
+    def _log_json_artifact(
+        drift_per_feature_values: Series,
+        monitoring_context: mm_context.MonitoringApplicationContext,
+    ) -> None:
         """Log the drift values as a JSON artifact"""
-        self.context.logger.debug("Logging drift value per feature JSON artifact")
-        self.context.log_artifact(
+        monitoring_context.logger.debug("Logging drift value per feature JSON artifact")
+        monitoring_context.log_artifact(
             mlrun.artifacts.Artifact(
                 body=drift_per_feature_values.to_json(),
                 format="json",
                 key="features_drift_results",
             )
         )
-        self.context.logger.debug("Logged JSON artifact successfully")
+        monitoring_context.logger.debug("Logged JSON artifact successfully")
     def _log_plotly_table_artifact(
         self,
@@ -227,34 +256,34 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
         inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
         metrics_per_feature: DataFrame,
         drift_per_feature_values: Series,
+        monitoring_context: mm_context.MonitoringApplicationContext,
     ) -> None:
         """Log the Plotly drift table artifact"""
-        self.context.logger.debug(
+        monitoring_context.logger.debug(
             "Feature stats",
             sample_set_statistics=sample_set_statistics,
             inputs_statistics=inputs_statistics,
         )
-        self.context.logger.debug("Computing drift results per feature")
+        monitoring_context.logger.debug("Computing drift results per feature")
         drift_results = {
             cast(str, key): (self._value_classifier.value_to_status(value), value)
             for key, value in drift_per_feature_values.items()
         }
-        self.context.logger.debug("Logging plotly artifact")
-        self.context.log_artifact(
+        monitoring_context.logger.debug("Logging plotly artifact")
+        monitoring_context.log_artifact(
             mm_drift_table.FeaturesDriftTablePlot().produce(
                 sample_set_statistics=sample_set_statistics,
                 inputs_statistics=inputs_statistics,
-                metrics=metrics_per_feature.T.to_dict(),
+                metrics=metrics_per_feature.T.to_dict(),  # pyright: ignore[reportArgumentType]
                 drift_results=drift_results,
             )
         )
-        self.context.logger.debug("Logged plotly artifact successfully")
+        monitoring_context.logger.debug("Logged plotly artifact successfully")
     def _log_drift_artifacts(
         self,
-        sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
-        inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
+        monitoring_context: mm_context.MonitoringApplicationContext,
         metrics_per_feature: DataFrame,
         log_json_artifact: bool = True,
     ) -> None:
@@ -264,45 +293,57 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
         ].mean(axis=1)
         if log_json_artifact:
-            self._log_json_artifact(drift_per_feature_values)
+            self._log_json_artifact(drift_per_feature_values, monitoring_context)
         self._log_plotly_table_artifact(
-            sample_set_statistics=self._remove_timestamp_feature(sample_set_statistics),
-            inputs_statistics=inputs_statistics,
+            sample_set_statistics=self._remove_timestamp_feature(
+                monitoring_context.sample_df_stats
+            ),
+            inputs_statistics=monitoring_context.feature_stats,
             metrics_per_feature=metrics_per_feature,
             drift_per_feature_values=drift_per_feature_values,
+            monitoring_context=monitoring_context,
         )
     def do_tracking(
         self,
-        application_name: str,
-        sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
-        feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
-        sample_df: DataFrame,
-        start_infer_time: Timestamp,
-        end_infer_time: Timestamp,
-        latest_request: Timestamp,
-        endpoint_id: str,
-        output_stream_uri: str,
-    ) -> list[ModelMonitoringApplicationResult]:
+        monitoring_context: mm_context.MonitoringApplicationContext,
+    ) -> list[
+        Union[
+            mm_results.ModelMonitoringApplicationResult,
+            mm_results.ModelMonitoringApplicationMetric,
+        ]
+    ]:
         """
         Calculate and return the data drift metrics, averaged over the features.
-        Refer to `ModelMonitoringApplicationBase` for the meaning of the
+        Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
         function arguments.
         """
-        self.context.logger.debug("Starting to run the application")
+        monitoring_context.logger.debug("Starting to run the application")
+        if not monitoring_context.feature_stats:
+            monitoring_context.logger.info(
+                "No feature statistics found, skipping the application. \n"
+                "In order to run the application, training set must be provided when logging the model."
+            )
+            return []
         metrics_per_feature = self._compute_metrics_per_feature(
-            sample_df_stats=self.dict_to_histogram(sample_df_stats),
-            feature_stats=self.dict_to_histogram(feature_stats),
+            monitoring_context=monitoring_context
         )
-        self.context.logger.debug("Saving artifacts")
+        monitoring_context.logger.debug("Saving artifacts")
         self._log_drift_artifacts(
-            inputs_statistics=feature_stats,
-            sample_set_statistics=sample_df_stats,
+            monitoring_context=monitoring_context,
+            metrics_per_feature=metrics_per_feature,
+        )
+        monitoring_context.logger.debug("Computing average per metric")
+        metrics = self._get_metrics(metrics_per_feature)
+        result = self._get_general_drift_result(
+            metrics=metrics,
+            monitoring_context=monitoring_context,
             metrics_per_feature=metrics_per_feature,
         )
-        self.context.logger.debug("Computing average per metric")
-        results = self._get_results(metrics_per_feature)
-        self.context.logger.debug("Finished running the application", results=results)
-        return results
+        metrics_and_result = metrics + [result]
+        monitoring_context.logger.debug(
+            "Finished running the application", results=metrics_and_result
+        )
+        return metrics_and_result

mlrun/model_monitoring/applications/results.py ADDED Viewed

@@ -0,0 +1,99 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import dataclasses
+import json
+import re
+from abc import ABC, abstractmethod
+import mlrun.common.helpers
+import mlrun.common.model_monitoring.helpers
+import mlrun.common.schemas.model_monitoring.constants as mm_constant
+import mlrun.utils.v3io_clients
+class _ModelMonitoringApplicationDataRes(ABC):
+    name: str
+    def __post_init__(self):
+        pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
+        if not re.fullmatch(pat, self.name):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
+            )
+    @abstractmethod
+    def to_dict(self):
+        raise NotImplementedError
+@dataclasses.dataclass
+class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
+    """
+    Class representing the result of a custom model monitoring application.
+    :param name:           (str) Name of the application result. This name must be
+                            unique for each metric in a single application
+                            (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
+    :param value:          (float) Value of the application result.
+    :param kind:           (ResultKindApp) Kind of application result.
+    :param status:         (ResultStatusApp) Status of the application result.
+    :param extra_data:     (dict) Extra data associated with the application result.
+    """
+    name: str
+    value: float
+    kind: mm_constant.ResultKindApp
+    status: mm_constant.ResultStatusApp
+    extra_data: dict = dataclasses.field(default_factory=dict)
+    def to_dict(self):
+        """
+        Convert the object to a dictionary format suitable for writing.
+        :returns:    (dict) Dictionary representation of the result.
+        """
+        return {
+            mm_constant.ResultData.RESULT_NAME: self.name,
+            mm_constant.ResultData.RESULT_VALUE: self.value,
+            mm_constant.ResultData.RESULT_KIND: self.kind.value,
+            mm_constant.ResultData.RESULT_STATUS: self.status.value,
+            mm_constant.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
+        }
+@dataclasses.dataclass
+class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
+    """
+    Class representing a single metric of a custom model monitoring application.
+    :param name:           (str) Name of the application metric. This name must be
+                            unique for each metric in a single application
+                            (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
+    :param value:          (float) Value of the application metric.
+    """
+    name: str
+    value: float
+    def to_dict(self):
+        """
+        Convert the object to a dictionary format suitable for writing.
+        :returns:    (dict) Dictionary representation of the result.
+        """
+        return {
+            mm_constant.MetricData.METRIC_NAME: self.name,
+            mm_constant.MetricData.METRIC_VALUE: self.value,
+        }

mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc14py3-none-any.whl → 1.7.0rc22py3-none-any.whl