PyPI - mlrun - Versions diffs - 1.7.0rc13__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl - Mend

mlrun 1.7.0rc13py3-none-any.whl → 1.7.0rc21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (156) hide show

mlrun/__init__.py +10 -1
mlrun/__main__.py +23 -111
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +144 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +36 -253
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +46 -42
mlrun/artifacts/model.py +9 -141
mlrun/artifacts/plots.py +14 -375
mlrun/common/constants.py +65 -3
mlrun/common/formatters/__init__.py +19 -0
mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +10 -5
mlrun/common/schemas/alert.py +92 -11
mlrun/common/schemas/api_gateway.py +56 -0
mlrun/common/schemas/artifact.py +15 -5
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/model_monitoring/__init__.py +15 -3
mlrun/common/schemas/model_monitoring/constants.py +58 -7
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
mlrun/common/schemas/pipeline.py +0 -9
mlrun/common/schemas/project.py +6 -11
mlrun/common/types.py +1 -0
mlrun/config.py +36 -8
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/base.py +41 -9
mlrun/datastore/datastore.py +6 -2
mlrun/datastore/datastore_profile.py +56 -4
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/sources.py +147 -7
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +129 -9
mlrun/datastore/utils.py +42 -0
mlrun/datastore/v3io.py +1 -1
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +55 -11
mlrun/db/httpdb.py +346 -107
mlrun/db/nopdb.py +52 -10
mlrun/errors.py +11 -0
mlrun/execution.py +24 -9
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +12 -47
mlrun/feature_store/feature_set.py +9 -0
mlrun/feature_store/feature_vector.py +8 -0
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +9 -9
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +16 -0
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/tf_keras/__init__.py +4 -1
mlrun/k8s_utils.py +10 -11
mlrun/launcher/base.py +4 -3
mlrun/launcher/client.py +5 -3
mlrun/launcher/local.py +8 -2
mlrun/launcher/remote.py +8 -2
mlrun/lists.py +6 -2
mlrun/model.py +62 -20
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +41 -18
mlrun/model_monitoring/application.py +5 -305
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +280 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +3 -1
mlrun/model_monitoring/db/__init__.py +2 -0
mlrun/model_monitoring/db/stores/__init__.py +0 -2
mlrun/model_monitoring/db/stores/base/store.py +22 -37
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +329 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/helpers.py +46 -1
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +57 -216
mlrun/model_monitoring/writer.py +134 -124
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +10 -9
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +19 -12
mlrun/projects/pipelines.py +103 -109
mlrun/projects/project.py +377 -137
mlrun/render.py +15 -14
mlrun/run.py +16 -47
mlrun/runtimes/__init__.py +6 -3
mlrun/runtimes/base.py +8 -7
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +440 -208
mlrun/runtimes/nuclio/application/application.py +170 -8
mlrun/runtimes/nuclio/function.py +39 -49
mlrun/runtimes/pod.py +21 -41
mlrun/runtimes/remotesparkjob.py +9 -3
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +6 -45
mlrun/serving/server.py +2 -1
mlrun/serving/states.py +53 -2
mlrun/serving/v2_serving.py +5 -1
mlrun/track/tracker.py +2 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +107 -75
mlrun/utils/logger.py +39 -7
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification/base.py +1 -1
mlrun/utils/notifications/notification/slack.py +61 -13
mlrun/utils/notifications/notification/webhook.py +1 -1
mlrun/utils/notifications/notification_pusher.py +147 -16
mlrun/utils/regex.py +9 -0
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +154 -133
mlrun/kfpops.py +0 -865
mlrun/platforms/other.py +0 -305
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -22,9 +22,10 @@ import pandas as pd
 import mlrun.artifacts
 import mlrun.common.helpers
-import mlrun.common.schemas.model_monitoring.constants as mm_consts
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.feature_store
 import mlrun.model_monitoring.application
+import mlrun.model_monitoring.applications as mm_app
 import mlrun.serving
 from mlrun.data_types.infer import InferOptions, get_df_stats
 from mlrun.utils import datetime_now, logger
@@ -48,7 +49,7 @@ def get_or_create_model_endpoint(
     sample_set_statistics: dict[str, typing.Any] = None,
     drift_threshold: float = None,
     possible_drift_threshold: float = None,
-    monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.disabled,
+    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
     db_session=None,
 ) -> ModelEndpoint:
     """
@@ -128,7 +129,7 @@ def record_results(
     context: typing.Optional[mlrun.MLClientCtx] = None,
     infer_results_df: typing.Optional[pd.DataFrame] = None,
     sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
-    monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.enabled,
+    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
     # Deprecated arguments:
     drift_threshold: typing.Optional[float] = None,
     possible_drift_threshold: typing.Optional[float] = None,
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
     # drift and possible drift thresholds
     if drift_threshold:
         current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
-            mm_consts.EventFieldType.DRIFT_DETECTED_THRESHOLD,
+            mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
             mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
         )
         if current_drift_threshold != drift_threshold:
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
     if possible_drift_threshold:
         current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
-            mm_consts.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
+            mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
             mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
         )
         if current_possible_drift_threshold != possible_drift_threshold:
@@ -332,14 +333,14 @@ def write_monitoring_df(
         )
     # Modify the DataFrame to the required structure that will be used later by the monitoring batch job
-    if mm_consts.EventFieldType.TIMESTAMP not in infer_results_df.columns:
+    if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
         # Initialize timestamp column with the current time
-        infer_results_df[mm_consts.EventFieldType.TIMESTAMP] = infer_datetime
+        infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
     # `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
     # the ingest process
-    infer_results_df[mm_consts.EventFieldType.ENDPOINT_ID] = endpoint_id
-    infer_results_df.set_index(mm_consts.EventFieldType.ENDPOINT_ID, inplace=True)
+    infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
+    infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
     monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
@@ -355,7 +356,7 @@ def _generate_model_endpoint(
     sample_set_statistics: dict[str, typing.Any],
     drift_threshold: float,
     possible_drift_threshold: float,
-    monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.disabled,
+    monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
 ) -> ModelEndpoint:
     """
     Write a new model endpoint record.
@@ -394,11 +395,11 @@ def _generate_model_endpoint(
     model_endpoint.spec.model_class = "drift-analysis"
     if drift_threshold:
         model_endpoint.spec.monitor_configuration[
-            mm_consts.EventFieldType.DRIFT_DETECTED_THRESHOLD
+            mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
         ] = drift_threshold
     if possible_drift_threshold:
         model_endpoint.spec.monitor_configuration[
-            mm_consts.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
+            mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
         ] = possible_drift_threshold
     model_endpoint.spec.monitoring_mode = monitoring_mode
@@ -589,7 +590,10 @@ def _create_model_monitoring_function_base(
     project: str,
     func: typing.Union[str, None] = None,
     application_class: typing.Union[
-        str, mlrun.model_monitoring.application.ModelMonitoringApplicationBase, None
+        str,
+        mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
+        mm_app.ModelMonitoringApplicationBaseV2,
+        None,
     ] = None,
     name: typing.Optional[str] = None,
     image: typing.Optional[str] = None,
@@ -602,6 +606,20 @@ def _create_model_monitoring_function_base(
     Note: this is an internal API only.
     This function does not set the labels or mounts v3io.
     """
+    if isinstance(
+        application_class,
+        mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
+    ):
+        warnings.warn(
+            "The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
+            "please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
+            FutureWarning,
+        )
+    if name in mm_constants.MonitoringFunctionNames.list():
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"An application cannot have the following names: "
+            f"{mm_constants.MonitoringFunctionNames.list()}"
+        )
     if func is None:
         func = ""
     func_obj = typing.cast(
@@ -618,14 +636,19 @@ def _create_model_monitoring_function_base(
         ),
     )
     graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
+    prepare_step = graph.to(
+        class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
+        name="PrepareMonitoringEvent",
+        application_name=name,
+    )
     if isinstance(application_class, str):
-        first_step = graph.to(class_name=application_class, **application_kwargs)
+        app_step = prepare_step.to(class_name=application_class, **application_kwargs)
     else:
-        first_step = graph.to(class_name=application_class)
-    first_step.to(
-        class_name="mlrun.model_monitoring.application.PushToMonitoringWriter",
+        app_step = prepare_step.to(class_name=application_class)
+    app_step.to(
+        class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
         name="PushToMonitoringWriter",
         project=project,
-        writer_application_name=mm_consts.MonitoringFunctionNames.WRITER,
+        writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
     ).respond()
     return func_obj

mlrun/model_monitoring/application.py CHANGED Viewed

@@ -12,308 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import dataclasses
-import json
-import re
-from abc import ABC, abstractmethod
-from typing import Any, Optional, Union, cast
-import numpy as np
-import pandas as pd
-import mlrun.common.helpers
-import mlrun.common.model_monitoring.helpers
-import mlrun.common.schemas.model_monitoring.constants as mm_constant
-import mlrun.utils.v3io_clients
-from mlrun.datastore import get_stream_pusher
-from mlrun.datastore.targets import ParquetTarget
-from mlrun.model_monitoring.helpers import get_stream_path
-from mlrun.serving.utils import StepToDict
-from mlrun.utils import logger
-@dataclasses.dataclass
-class ModelMonitoringApplicationResult:
-    """
-    Class representing the result of a custom model monitoring application.
-    :param name:           (str) Name of the application result. This name must be
-                            unique for each metric in a single application
-                            (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
-    :param value:          (float) Value of the application result.
-    :param kind:           (ResultKindApp) Kind of application result.
-    :param status:         (ResultStatusApp) Status of the application result.
-    :param extra_data:     (dict) Extra data associated with the application result.
-    """
-    name: str
-    value: float
-    kind: mm_constant.ResultKindApp
-    status: mm_constant.ResultStatusApp
-    extra_data: dict = dataclasses.field(default_factory=dict)
-    def __post_init__(self):
-        pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
-        if not re.fullmatch(pat, self.name):
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
-            )
-    def to_dict(self):
-        """
-        Convert the object to a dictionary format suitable for writing.
-        :returns:    (dict) Dictionary representation of the result.
-        """
-        return {
-            mm_constant.WriterEvent.RESULT_NAME: self.name,
-            mm_constant.WriterEvent.RESULT_VALUE: self.value,
-            mm_constant.WriterEvent.RESULT_KIND: self.kind,
-            mm_constant.WriterEvent.RESULT_STATUS: self.status,
-            mm_constant.WriterEvent.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
-        }
-class ModelMonitoringApplicationBase(StepToDict, ABC):
-    """
-    A base class for a model monitoring application.
-    Inherit from this class to create a custom model monitoring application.
-    example for very simple custom application::
-        # mlrun: start-code
-        class MyApp(ApplicationBase):
-            def do_tracking(
-                self,
-                sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
-                feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
-                start_infer_time: pd.Timestamp,
-                end_infer_time: pd.Timestamp,
-                schedule_time: pd.Timestamp,
-                latest_request: pd.Timestamp,
-                endpoint_id: str,
-                output_stream_uri: str,
-            ) -> ModelMonitoringApplicationResult:
-                self.context.log_artifact(
-                    TableArtifact(
-                        "sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
-                    )
-                )
-                return ModelMonitoringApplicationResult(
-                    name="data_drift_test",
-                    value=0.5,
-                    kind=mm_constant.ResultKindApp.data_drift,
-                    status=mm_constant.ResultStatusApp.detected,
-                )
-        # mlrun: end-code
-    """
-    kind = "monitoring_application"
-    def do(
-        self, event: dict[str, Any]
-    ) -> tuple[list[ModelMonitoringApplicationResult], dict]:
-        """
-        Process the monitoring event and return application results.
-        :param event:   (dict) The monitoring event to process.
-        :returns:       (list[ModelMonitoringApplicationResult], dict) The application results
-                        and the original event for the application.
-        """
-        resolved_event = self._resolve_event(event)
-        if not (
-            hasattr(self, "context") and isinstance(self.context, mlrun.MLClientCtx)
-        ):
-            self._lazy_init(app_name=resolved_event[0])
-        results = self.do_tracking(*resolved_event)
-        results = results if isinstance(results, list) else [results]
-        return results, event
-    def _lazy_init(self, app_name: str):
-        self.context = cast(
-            mlrun.MLClientCtx, self._create_context_for_logging(app_name=app_name)
-        )
-    @abstractmethod
-    def do_tracking(
-        self,
-        application_name: str,
-        sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
-        feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
-        sample_df: pd.DataFrame,
-        start_infer_time: pd.Timestamp,
-        end_infer_time: pd.Timestamp,
-        latest_request: pd.Timestamp,
-        endpoint_id: str,
-        output_stream_uri: str,
-    ) -> Union[
-        ModelMonitoringApplicationResult, list[ModelMonitoringApplicationResult]
-    ]:
-        """
-        Implement this method with your custom monitoring logic.
-        :param application_name:         (str) the app name
-        :param sample_df_stats:         (FeatureStats) The new sample distribution dictionary.
-        :param feature_stats:           (FeatureStats) The train sample distribution dictionary.
-        :param sample_df:               (pd.DataFrame) The new sample DataFrame.
-        :param start_infer_time:        (pd.Timestamp) Start time of the monitoring schedule.
-        :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
-        :param latest_request:          (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
-        :param endpoint_id:             (str) ID of the monitored model endpoint
-        :param output_stream_uri:       (str) URI of the output stream for results
-        :returns:                       (ModelMonitoringApplicationResult) or
-                                        (list[ModelMonitoringApplicationResult]) of the application results.
-        """
-        raise NotImplementedError
-    @classmethod
-    def _resolve_event(
-        cls,
-        event: dict[str, Any],
-    ) -> tuple[
-        str,
-        mlrun.common.model_monitoring.helpers.FeatureStats,
-        mlrun.common.model_monitoring.helpers.FeatureStats,
-        pd.DataFrame,
-        pd.Timestamp,
-        pd.Timestamp,
-        pd.Timestamp,
-        str,
-        str,
-    ]:
-        """
-        Converting the event into a single tuple that will be used for passing the event arguments to the running
-        application
-        :param event: dictionary with all the incoming data
-        :return: A tuple of:
-                     [0] = (str) application name
-                     [1] = (dict) current input statistics
-                     [2] = (dict) train statistics
-                     [3] = (pd.DataFrame) current input data
-                     [4] = (pd.Timestamp) start time of the monitoring schedule
-                     [5] = (pd.Timestamp) end time of the monitoring schedule
-                     [6] = (pd.Timestamp) timestamp of the latest request
-                     [7] = (str) endpoint id
-                     [8] = (str) output stream uri
-        """
-        start_time = pd.Timestamp(event[mm_constant.ApplicationEvent.START_INFER_TIME])
-        end_time = pd.Timestamp(event[mm_constant.ApplicationEvent.END_INFER_TIME])
-        return (
-            event[mm_constant.ApplicationEvent.APPLICATION_NAME],
-            json.loads(event[mm_constant.ApplicationEvent.CURRENT_STATS]),
-            json.loads(event[mm_constant.ApplicationEvent.FEATURE_STATS]),
-            ParquetTarget(
-                path=event[mm_constant.ApplicationEvent.SAMPLE_PARQUET_PATH]
-            ).as_df(
-                start_time=start_time,
-                end_time=end_time,
-                time_column=mm_constant.FeatureSetFeatures.time_stamp(),
-            ),
-            start_time,
-            end_time,
-            pd.Timestamp(event[mm_constant.ApplicationEvent.LAST_REQUEST]),
-            event[mm_constant.ApplicationEvent.ENDPOINT_ID],
-            event[mm_constant.ApplicationEvent.OUTPUT_STREAM_URI],
-        )
-    @staticmethod
-    def _create_context_for_logging(app_name: str):
-        context = mlrun.get_or_create_ctx(
-            f"{app_name}-logger",
-            upload_artifacts=True,
-            labels={"workflow": "model-monitoring-app-logger"},
-        )
-        return context
-    @staticmethod
-    def dict_to_histogram(
-        histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
-    ) -> pd.DataFrame:
-        """
-        Convert histogram dictionary to pandas DataFrame with feature histograms as columns
-        :param histogram_dict: Histogram dictionary
-        :returns: Histogram dataframe
-        """
-        # Create a dictionary with feature histograms as values
-        histograms = {}
-        for feature, stats in histogram_dict.items():
-            if "hist" in stats:
-                # Normalize to probability distribution of each feature
-                histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
-        # Convert the dictionary to pandas DataFrame
-        histograms = pd.DataFrame(histograms)
-        return histograms
-class PushToMonitoringWriter(StepToDict):
-    kind = "monitoring_application_stream_pusher"
-    def __init__(
-        self,
-        project: Optional[str] = None,
-        writer_application_name: Optional[str] = None,
-        stream_uri: Optional[str] = None,
-        name: Optional[str] = None,
-    ):
-        """
-        Class for pushing application results to the monitoring writer stream.
-        :param project:                     Project name.
-        :param writer_application_name:     Writer application name.
-        :param stream_uri:                  Stream URI for pushing results.
-        :param name:                        Name of the PushToMonitoringWriter
-                                            instance default to PushToMonitoringWriter.
-        """
-        self.project = project
-        self.application_name_to_push = writer_application_name
-        self.stream_uri = stream_uri or get_stream_path(
-            project=self.project, function_name=self.application_name_to_push
-        )
-        self.output_stream = None
-        self.name = name or "PushToMonitoringWriter"
-    def do(self, event: tuple[list[ModelMonitoringApplicationResult], dict]) -> None:
-        """
-        Push application results to the monitoring writer stream.
-        :param event: Monitoring result(s) to push and the original event from the controller.
-        """
-        self._lazy_init()
-        application_results, application_event = event
-        metadata = {
-            mm_constant.WriterEvent.APPLICATION_NAME: application_event[
-                mm_constant.ApplicationEvent.APPLICATION_NAME
-            ],
-            mm_constant.WriterEvent.ENDPOINT_ID: application_event[
-                mm_constant.ApplicationEvent.ENDPOINT_ID
-            ],
-            mm_constant.WriterEvent.START_INFER_TIME: application_event[
-                mm_constant.ApplicationEvent.START_INFER_TIME
-            ],
-            mm_constant.WriterEvent.END_INFER_TIME: application_event[
-                mm_constant.ApplicationEvent.END_INFER_TIME
-            ],
-            mm_constant.WriterEvent.CURRENT_STATS: json.dumps(
-                application_event[mm_constant.ApplicationEvent.CURRENT_STATS]
-            ),
-        }
-        for result in application_results:
-            data = result.to_dict()
-            data.update(metadata)
-            logger.info(f"Pushing data = {data} \n to stream = {self.stream_uri}")
-            self.output_stream.push([data])
-    def _lazy_init(self):
-        if self.output_stream is None:
-            self.output_stream = get_stream_pusher(
-                self.stream_uri,
-            )
+# TODO : delete this file in 1.9.0
+from mlrun.model_monitoring.applications import (  # noqa: F401
+    ModelMonitoringApplicationBase,
+    ModelMonitoringApplicationResult,
+)

mlrun/model_monitoring/applications/__init__.py CHANGED Viewed

@@ -11,3 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
+from .base import ModelMonitoringApplicationBase, ModelMonitoringApplicationBaseV2
+from .context import MonitoringApplicationContext
+from .evidently_base import (
+    _HAS_EVIDENTLY,
+    SUPPORTED_EVIDENTLY_VERSION,
+    EvidentlyModelMonitoringApplicationBase,
+    EvidentlyModelMonitoringApplicationBaseV2,
+)
+from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult

mlrun/model_monitoring/applications/_application_steps.py ADDED Viewed

@@ -0,0 +1,157 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import typing
+from typing import Optional
+import mlrun.common.helpers
+import mlrun.common.model_monitoring.helpers
+import mlrun.common.schemas.model_monitoring.constants as mm_constant
+import mlrun.datastore
+import mlrun.utils.v3io_clients
+from mlrun.model_monitoring.helpers import get_stream_path
+from mlrun.serving.utils import StepToDict
+from mlrun.utils import logger
+from .context import MonitoringApplicationContext
+from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult
+class _PushToMonitoringWriter(StepToDict):
+    kind = "monitoring_application_stream_pusher"
+    def __init__(
+        self,
+        project: Optional[str] = None,
+        writer_application_name: Optional[str] = None,
+        stream_uri: Optional[str] = None,
+        name: Optional[str] = None,
+    ):
+        """
+        Class for pushing application results to the monitoring writer stream.
+        :param project:                     Project name.
+        :param writer_application_name:     Writer application name.
+        :param stream_uri:                  Stream URI for pushing results.
+        :param name:                        Name of the PushToMonitoringWriter
+                                            instance default to PushToMonitoringWriter.
+        """
+        self.project = project
+        self.application_name_to_push = writer_application_name
+        self.stream_uri = stream_uri or get_stream_path(
+            project=self.project, function_name=self.application_name_to_push
+        )
+        self.output_stream = None
+        self.name = name or "PushToMonitoringWriter"
+    def do(
+        self,
+        event: tuple[
+            list[
+                typing.Union[
+                    ModelMonitoringApplicationResult, ModelMonitoringApplicationMetric
+                ]
+            ],
+            MonitoringApplicationContext,
+        ],
+    ) -> None:
+        """
+        Push application results to the monitoring writer stream.
+        :param event: Monitoring result(s) to push and the original event from the controller.
+        """
+        self._lazy_init()
+        application_results, application_context = event
+        writer_event = {
+            mm_constant.WriterEvent.APPLICATION_NAME: application_context.application_name,
+            mm_constant.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
+            mm_constant.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
+                sep=" ", timespec="microseconds"
+            ),
+            mm_constant.WriterEvent.END_INFER_TIME: application_context.end_infer_time.isoformat(
+                sep=" ", timespec="microseconds"
+            ),
+        }
+        for result in application_results:
+            data = result.to_dict()
+            if isinstance(result, ModelMonitoringApplicationResult):
+                writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
+                    mm_constant.WriterEventKind.RESULT
+                )
+                data[mm_constant.ResultData.CURRENT_STATS] = json.dumps(
+                    application_context.sample_df_stats
+                )
+                writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
+            else:
+                writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
+                    mm_constant.WriterEventKind.METRIC
+                )
+                writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
+            writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
+                mm_constant.WriterEventKind.RESULT
+                if isinstance(result, ModelMonitoringApplicationResult)
+                else mm_constant.WriterEventKind.METRIC
+            )
+            logger.info(
+                f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
+            )
+            self.output_stream.push([writer_event])
+    def _lazy_init(self):
+        if self.output_stream is None:
+            self.output_stream = mlrun.datastore.get_stream_pusher(
+                self.stream_uri,
+            )
+class _PrepareMonitoringEvent(StepToDict):
+    def __init__(self, application_name: str):
+        """
+        Class for preparing the application event for the application step.
+        :param application_name: Application name.
+        """
+        self.context = self._create_mlrun_context(application_name)
+        self.model_endpoints = {}
+    def do(self, event: dict[str, dict]) -> MonitoringApplicationContext:
+        """
+        Prepare the application event for the application step.
+        :param event: Application event.
+        :return: Application event.
+        """
+        if not event.get("mlrun_context"):
+            application_context = MonitoringApplicationContext().from_dict(
+                event,
+                context=self.context,
+                model_endpoint_dict=self.model_endpoints,
+            )
+        else:
+            application_context = MonitoringApplicationContext().from_dict(event)
+        self.model_endpoints.setdefault(
+            application_context.endpoint_id, application_context.model_endpoint
+        )
+        return application_context
+    @staticmethod
+    def _create_mlrun_context(app_name: str):
+        context = mlrun.get_or_create_ctx(
+            f"{app_name}-logger",
+            upload_artifacts=True,
+        )
+        context.__class__ = MonitoringApplicationContext
+        return context

mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc13py3-none-any.whl → 1.7.0rc21py3-none-any.whl