PyPI - mlrun - Versions diffs - 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl - Mend

mlrun 1.7.0rc5py3-none-any.whl → 1.7.0rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (75) hide show

mlrun/artifacts/base.py +2 -1
mlrun/artifacts/plots.py +9 -5
mlrun/common/constants.py +6 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -0
mlrun/common/schemas/model_monitoring/constants.py +35 -18
mlrun/common/schemas/project.py +1 -0
mlrun/common/types.py +7 -1
mlrun/config.py +19 -6
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +22 -16
mlrun/datastore/datastore.py +4 -0
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/sources.py +7 -7
mlrun/db/base.py +14 -6
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +61 -56
mlrun/db/nopdb.py +3 -0
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +1 -1
mlrun/launcher/client.py +1 -1
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/model.py +1 -0
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +104 -301
mlrun/model_monitoring/application.py +21 -21
mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
mlrun/model_monitoring/controller.py +26 -33
mlrun/model_monitoring/db/__init__.py +16 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +45 -6
mlrun/model_monitoring/stream_processing.py +43 -9
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +4 -36
mlrun/projects/pipelines.py +13 -1
mlrun/projects/project.py +279 -117
mlrun/run.py +72 -74
mlrun/runtimes/__init__.py +35 -0
mlrun/runtimes/base.py +7 -1
mlrun/runtimes/nuclio/api_gateway.py +188 -61
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +283 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
mlrun/runtimes/nuclio/function.py +53 -1
mlrun/runtimes/nuclio/serving.py +28 -32
mlrun/runtimes/pod.py +27 -1
mlrun/serving/server.py +4 -6
mlrun/serving/states.py +41 -33
mlrun/utils/helpers.py +34 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/mysql.py +0 -34
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -13,8 +13,8 @@
 # limitations under the License.
 import hashlib
-import json
 import typing
+import warnings
 from datetime import datetime
 import numpy as np
@@ -22,13 +22,13 @@ import pandas as pd
 import mlrun.artifacts
 import mlrun.common.helpers
+import mlrun.common.schemas.model_monitoring.constants as mm_consts
 import mlrun.feature_store
-from mlrun.common.schemas.model_monitoring import EventFieldType, ModelMonitoringMode
+import mlrun.model_monitoring.application
+import mlrun.serving
 from mlrun.data_types.infer import InferOptions, get_df_stats
 from mlrun.utils import datetime_now, logger
-from .batch import VirtualDrift
-from .features_drift_table import FeaturesDriftTablePlot
 from .helpers import update_model_endpoint_last_request
 from .model_endpoint import ModelEndpoint
@@ -48,7 +48,7 @@ def get_or_create_model_endpoint(
     sample_set_statistics: dict[str, typing.Any] = None,
     drift_threshold: float = None,
     possible_drift_threshold: float = None,
-    monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.disabled,
+    monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.disabled,
     db_session=None,
 ) -> ModelEndpoint:
     """
@@ -128,20 +128,19 @@ def record_results(
     context: typing.Optional[mlrun.MLClientCtx] = None,
     infer_results_df: typing.Optional[pd.DataFrame] = None,
     sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
-    monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.enabled,
+    monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.enabled,
+    # Deprecated arguments:
     drift_threshold: typing.Optional[float] = None,
     possible_drift_threshold: typing.Optional[float] = None,
     trigger_monitoring_job: bool = False,
     artifacts_tag: str = "",
-    default_batch_image="mlrun/mlrun",
+    default_batch_image: str = "mlrun/mlrun",
 ) -> ModelEndpoint:
     """
     Write a provided inference dataset to model endpoint parquet target. If not exist, generate a new model endpoint
     record and use the provided sample set statistics as feature stats that will be used later for the drift analysis.
-    To manually trigger the monitoring batch job, set `trigger_monitoring_job=True` and then the batch
-    job will immediately perform drift analysis between the sample set statistics stored in the model and the new
-    input data (along with the outputs). The drift rule is the value per-feature mean of the TVD and Hellinger scores
-    according to the provided thresholds.
+    To activate model monitoring, run `project.enable_model_monitoring()`. The model monitoring applications will be
+    triggered with the recorded data according to a periodic schedule.
     :param project:                  Project name.
     :param model_path:               The model Store path.
@@ -160,17 +159,47 @@ def record_results(
                                      the current model endpoint.
     :param monitoring_mode:          If enabled, apply model monitoring features on the provided endpoint id. Enabled
                                      by default.
-    :param drift_threshold:          The threshold of which to mark drifts.
-    :param possible_drift_threshold: The threshold of which to mark possible drifts.
-    :param trigger_monitoring_job:   If true, run the batch drift job. If not exists, the monitoring batch function
-                                     will be registered through MLRun API with the provided image.
-    :param artifacts_tag:            Tag to use for all the artifacts resulted from the function. Will be relevant
-                                     only if the monitoring batch job has been triggered.
-    :param default_batch_image:      The image that will be used when registering the model monitoring batch job.
+    :param drift_threshold:          (deprecated) The threshold of which to mark drifts.
+    :param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts.
+    :param trigger_monitoring_job:   (deprecated) If true, run the batch drift job. If not exists, the monitoring
+                                     batch function will be registered through MLRun API with the provided image.
+    :param artifacts_tag:            (deprecated) Tag to use for all the artifacts resulted from the function.
+                                     Will be relevant only if the monitoring batch job has been triggered.
+    :param default_batch_image:      (deprecated) The image that will be used when registering the model monitoring
+                                     batch job.
     :return: A ModelEndpoint object
     """
+    if drift_threshold is not None or possible_drift_threshold is not None:
+        warnings.warn(
+            "Custom drift threshold arguments are deprecated since version "
+            "1.7.0 and have no effect. They will be removed in version 1.9.0.\n"
+            "To enable the default histogram data drift application, run:\n"
+            "`project.enable_model_monitoring()`.",
+            FutureWarning,
+        )
+    if trigger_monitoring_job is not False:
+        warnings.warn(
+            "`trigger_monitoring_job` argument is deprecated since version "
+            "1.7.0 and has no effect. It will be removed in version 1.9.0.\n"
+            "To enable the default histogram data drift application, run:\n"
+            "`project.enable_model_monitoring()`.",
+            FutureWarning,
+        )
+    if artifacts_tag != "":
+        warnings.warn(
+            "`artifacts_tag` argument is deprecated since version "
+            "1.7.0 and has no effect. It will be removed in version 1.9.0.",
+            FutureWarning,
+        )
+    if default_batch_image != "mlrun/mlrun":
+        warnings.warn(
+            "`default_batch_image` argument is deprecated since version "
+            "1.7.0 and has no effect. It will be removed in version 1.9.0.",
+            FutureWarning,
+        )
     db = mlrun.get_run_db()
     model_endpoint = get_or_create_model_endpoint(
@@ -181,8 +210,6 @@ def record_results(
         function_name=function_name,
         context=context,
         sample_set_statistics=sample_set_statistics,
-        drift_threshold=drift_threshold,
-        possible_drift_threshold=possible_drift_threshold,
         monitoring_mode=monitoring_mode,
         db_session=db,
     )
@@ -206,33 +233,6 @@ def record_results(
         db=db,
     )
-    if trigger_monitoring_job:
-        # Run the monitoring batch drift job
-        trigger_drift_batch_job(
-            project=project,
-            default_batch_image=default_batch_image,
-            model_endpoints_ids=[model_endpoint.metadata.uid],
-            db_session=db,
-        )
-        # Getting drift thresholds if not provided
-        drift_threshold, possible_drift_threshold = get_drift_thresholds_if_not_none(
-            model_endpoint=model_endpoint,
-            drift_threshold=drift_threshold,
-            possible_drift_threshold=possible_drift_threshold,
-        )
-        perform_drift_analysis(
-            project=project,
-            context=context,
-            sample_set_statistics=model_endpoint.status.feature_stats,
-            drift_threshold=drift_threshold,
-            possible_drift_threshold=possible_drift_threshold,
-            artifacts_tag=artifacts_tag,
-            endpoint_id=model_endpoint.metadata.uid,
-            db_session=db,
-        )
     return model_endpoint
@@ -282,7 +282,7 @@ def _model_endpoint_validations(
     # drift and possible drift thresholds
     if drift_threshold:
         current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
-            EventFieldType.DRIFT_DETECTED_THRESHOLD,
+            mm_consts.EventFieldType.DRIFT_DETECTED_THRESHOLD,
             mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
         )
         if current_drift_threshold != drift_threshold:
@@ -293,7 +293,7 @@ def _model_endpoint_validations(
     if possible_drift_threshold:
         current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
-            EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
+            mm_consts.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
             mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
         )
         if current_possible_drift_threshold != possible_drift_threshold:
@@ -303,40 +303,6 @@ def _model_endpoint_validations(
             )
-def get_drift_thresholds_if_not_none(
-    model_endpoint: ModelEndpoint,
-    drift_threshold: float = None,
-    possible_drift_threshold: float = None,
-) -> tuple[float, float]:
-    """
-    Get drift and possible drift thresholds. If one of the thresholds is missing, will try to retrieve
-    it from the `ModelEndpoint` object. If not defined under the `ModelEndpoint` as well, will retrieve it from
-    the default mlrun configuration.
-    :param model_endpoint:           `ModelEndpoint` object.
-    :param drift_threshold:           The threshold of which to mark drifts.
-    :param possible_drift_threshold:  The threshold of which to mark possible drifts.
-    :return: A Tuple of:
-            [0] drift threshold as a float
-            [1] possible drift threshold as a float
-    """
-    if not drift_threshold:
-        # Getting drift threshold value from either model endpoint or monitoring default configurations
-        drift_threshold = model_endpoint.spec.monitor_configuration.get(
-            EventFieldType.DRIFT_DETECTED_THRESHOLD,
-            mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
-        )
-    if not possible_drift_threshold:
-        # Getting possible drift threshold value from either model endpoint or monitoring default configurations
-        possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
-            EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
-            mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
-        )
-    return drift_threshold, possible_drift_threshold
 def write_monitoring_df(
     endpoint_id: str,
     infer_results_df: pd.DataFrame,
@@ -366,14 +332,14 @@ def write_monitoring_df(
         )
     # Modify the DataFrame to the required structure that will be used later by the monitoring batch job
-    if EventFieldType.TIMESTAMP not in infer_results_df.columns:
+    if mm_consts.EventFieldType.TIMESTAMP not in infer_results_df.columns:
         # Initialize timestamp column with the current time
-        infer_results_df[EventFieldType.TIMESTAMP] = infer_datetime
+        infer_results_df[mm_consts.EventFieldType.TIMESTAMP] = infer_datetime
     # `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
     # the ingest process
-    infer_results_df[EventFieldType.ENDPOINT_ID] = endpoint_id
-    infer_results_df.set_index(EventFieldType.ENDPOINT_ID, inplace=True)
+    infer_results_df[mm_consts.EventFieldType.ENDPOINT_ID] = endpoint_id
+    infer_results_df.set_index(mm_consts.EventFieldType.ENDPOINT_ID, inplace=True)
     monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
@@ -389,7 +355,7 @@ def _generate_model_endpoint(
     sample_set_statistics: dict[str, typing.Any],
     drift_threshold: float,
     possible_drift_threshold: float,
-    monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.disabled,
+    monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.disabled,
 ) -> ModelEndpoint:
     """
     Write a new model endpoint record.
@@ -428,11 +394,11 @@ def _generate_model_endpoint(
     model_endpoint.spec.model_class = "drift-analysis"
     if drift_threshold:
         model_endpoint.spec.monitor_configuration[
-            EventFieldType.DRIFT_DETECTED_THRESHOLD
+            mm_consts.EventFieldType.DRIFT_DETECTED_THRESHOLD
         ] = drift_threshold
     if possible_drift_threshold:
         model_endpoint.spec.monitor_configuration[
-            EventFieldType.POSSIBLE_DRIFT_THRESHOLD
+            mm_consts.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
         ] = possible_drift_threshold
     model_endpoint.spec.monitoring_mode = monitoring_mode
@@ -449,71 +415,6 @@ def _generate_model_endpoint(
     return db_session.get_model_endpoint(project=project, endpoint_id=endpoint_id)
-def trigger_drift_batch_job(
-    project: str,
-    default_batch_image="mlrun/mlrun",
-    model_endpoints_ids: list[str] = None,
-    batch_intervals_dict: dict[str, float] = None,
-    db_session=None,
-):
-    """
-    Run model monitoring drift analysis job. If not exists, the monitoring batch function will be registered through
-    MLRun API with the provided image.
-    :param project:              Project name.
-    :param default_batch_image:  The image that will be used when registering the model monitoring batch job.
-    :param model_endpoints_ids:  List of model endpoints to include in the current run.
-    :param batch_intervals_dict: Batch interval range (days, hours, minutes). By default, the batch interval is
-                                 configured to run through the last hour.
-    :param db_session:           A runtime session that manages the current dialog with the database.
-    """
-    if not model_endpoints_ids:
-        raise mlrun.errors.MLRunNotFoundError(
-            "No model endpoints provided",
-        )
-    if not db_session:
-        db_session = mlrun.get_run_db()
-    # Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
-    batch_function_dict: dict[str, typing.Any] = db_session.deploy_monitoring_batch_job(
-        project=project,
-        default_batch_image=default_batch_image,
-    )
-    # Prepare current run params
-    job_params = _generate_job_params(
-        model_endpoints_ids=model_endpoints_ids,
-        batch_intervals_dict=batch_intervals_dict,
-    )
-    # Generate runtime and trigger the job function
-    batch_function = mlrun.new_function(runtime=batch_function_dict)
-    batch_function.run(name="model-monitoring-batch", params=job_params, watch=True)
-def _generate_job_params(
-    model_endpoints_ids: list[str],
-    batch_intervals_dict: dict[str, float] = None,
-):
-    """
-    Generate the required params for the model monitoring batch job function.
-    :param model_endpoints_ids:  List of model endpoints to include in the current run.
-    :param batch_intervals_dict: Batch interval range (days, hours, minutes). By default, the batch interval is
-                                 configured to run through the last hour.
-    """
-    if not batch_intervals_dict:
-        # Generate default batch intervals dict
-        batch_intervals_dict = {"minutes": 0, "hours": 1, "days": 0}
-    return {
-        "model_endpoints": model_endpoints_ids,
-        "batch_intervals_dict": batch_intervals_dict,
-    }
 def get_sample_set_statistics(
     sample_set: DatasetType = None,
     model_artifact_feature_stats: dict = None,
@@ -659,151 +560,6 @@ def read_dataset_as_dataframe(
     return dataset, label_columns
-def perform_drift_analysis(
-    project: str,
-    endpoint_id: str,
-    context: mlrun.MLClientCtx,
-    sample_set_statistics: dict,
-    drift_threshold: float,
-    possible_drift_threshold: float,
-    artifacts_tag: str = "",
-    db_session=None,
-) -> None:
-    """
-    Calculate drift per feature and produce the drift table artifact for logging post prediction. Note that most of
-    the calculations were already made through the monitoring batch job.
-    :param project:                  Project name.
-    :param endpoint_id:              Model endpoint unique ID.
-    :param context:                  MLRun context. Will log the artifacts.
-    :param sample_set_statistics:    The statistics of the sample set logged along a model.
-    :param drift_threshold:          The threshold of which to mark drifts.
-    :param possible_drift_threshold: The threshold of which to mark possible drifts.
-    :param artifacts_tag:            Tag to use for all the artifacts resulted from the function.
-    :param db_session:               A runtime session that manages the current dialog with the database.
-    """
-    if not db_session:
-        db_session = mlrun.get_run_db()
-    model_endpoint = db_session.get_model_endpoint(
-        project=project, endpoint_id=endpoint_id
-    )
-    # Get the drift metrics results along with the feature statistics from the latest batch
-    metrics = model_endpoint.status.drift_measures
-    inputs_statistics = model_endpoint.status.current_stats
-    inputs_statistics.pop(EventFieldType.TIMESTAMP, None)
-    # Calculate drift for each feature
-    virtual_drift = VirtualDrift()
-    drift_results = virtual_drift.check_for_drift_per_feature(
-        metrics_results_dictionary=metrics,
-        possible_drift_threshold=possible_drift_threshold,
-        drift_detected_threshold=drift_threshold,
-    )
-    # Drift table plot
-    html_plot = FeaturesDriftTablePlot().produce(
-        sample_set_statistics=sample_set_statistics,
-        inputs_statistics=inputs_statistics,
-        metrics=metrics,
-        drift_results=drift_results,
-    )
-    # Prepare drift result per feature dictionary
-    metrics_per_feature = {
-        feature: _get_drift_result(
-            tvd=metric_dictionary["tvd"],
-            hellinger=metric_dictionary["hellinger"],
-            threshold=drift_threshold,
-        )[1]
-        for feature, metric_dictionary in metrics.items()
-        if isinstance(metric_dictionary, dict)
-    }
-    # Calculate the final analysis result as well
-    drift_status, drift_metric = _get_drift_result(
-        tvd=metrics["tvd_mean"],
-        hellinger=metrics["hellinger_mean"],
-        threshold=drift_threshold,
-    )
-    # Log the different artifacts
-    _log_drift_artifacts(
-        context=context,
-        html_plot=html_plot,
-        metrics_per_feature=metrics_per_feature,
-        drift_status=drift_status,
-        drift_metric=drift_metric,
-        artifacts_tag=artifacts_tag,
-    )
-def _log_drift_artifacts(
-    context: mlrun.MLClientCtx,
-    html_plot: str,
-    metrics_per_feature: dict[str, float],
-    drift_status: bool,
-    drift_metric: float,
-    artifacts_tag: str,
-):
-    """
-    Log the following artifacts/results:
-    1 - Drift table plot which includes a detailed drift analysis per feature
-    2 - Drift result per feature in a JSON format
-    3 - Results of the total drift analysis
-    :param context:             MLRun context. Will log the artifacts.
-    :param html_plot:           Body of the html file of the plot.
-    :param metrics_per_feature: Dictionary in which the key is a feature name and the value is the drift numerical
-                                result.
-    :param drift_status:        Boolean value that represents the final drift analysis result.
-    :param drift_metric:        The final drift numerical result.
-    :param artifacts_tag:       Tag to use for all the artifacts resulted from the function.
-    """
-    context.log_artifact(
-        mlrun.artifacts.Artifact(
-            body=html_plot.encode("utf-8"), format="html", key="drift_table_plot"
-        ),
-        tag=artifacts_tag,
-    )
-    context.log_artifact(
-        mlrun.artifacts.Artifact(
-            body=json.dumps(metrics_per_feature),
-            format="json",
-            key="features_drift_results",
-        ),
-        tag=artifacts_tag,
-    )
-    context.log_results(
-        results={"drift_status": drift_status, "drift_metric": drift_metric}
-    )
-def _get_drift_result(
-    tvd: float,
-    hellinger: float,
-    threshold: float,
-) -> tuple[bool, float]:
-    """
-    Calculate the drift result by the following equation: (tvd + hellinger) / 2
-    :param tvd:       The feature's TVD value.
-    :param hellinger: The feature's Hellinger value.
-    :param threshold: The threshold from which the value is considered a drift.
-    :returns: A tuple of:
-              [0] = Boolean value as the drift status.
-              [1] = The result.
-    """
-    result = (tvd + hellinger) / 2
-    if result >= threshold:
-        return True, result
-    return False, result
 def log_result(
     context: mlrun.MLClientCtx,
     result_set_name: str,
@@ -826,3 +582,50 @@ def log_result(
         key="batch_id",
         value=batch_id,
     )
+def _create_model_monitoring_function_base(
+    *,
+    project: str,
+    func: typing.Union[str, None] = None,
+    application_class: typing.Union[
+        str, mlrun.model_monitoring.application.ModelMonitoringApplicationBase, None
+    ] = None,
+    name: typing.Optional[str] = None,
+    image: typing.Optional[str] = None,
+    tag: typing.Optional[str] = None,
+    requirements: typing.Union[str, list[str], None] = None,
+    requirements_file: str = "",
+    **application_kwargs,
+) -> mlrun.runtimes.ServingRuntime:
+    """
+    Note: this is an internal API only.
+    This function does not set the labels or mounts v3io.
+    """
+    if func is None:
+        func = ""
+    func_obj = typing.cast(
+        mlrun.runtimes.ServingRuntime,
+        mlrun.code_to_function(
+            filename=func,
+            name=name,
+            project=project,
+            tag=tag,
+            kind=mlrun.run.RuntimeKinds.serving,
+            image=image,
+            requirements=requirements,
+            requirements_file=requirements_file,
+        ),
+    )
+    graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
+    if isinstance(application_class, str):
+        first_step = graph.to(class_name=application_class, **application_kwargs)
+    else:
+        first_step = graph.to(class_name=application_class)
+    first_step.to(
+        class_name="mlrun.model_monitoring.application.PushToMonitoringWriter",
+        name="PushToMonitoringWriter",
+        project=project,
+        writer_application_name=mm_consts.MonitoringFunctionNames.WRITER,
+    ).respond()
+    return func_obj

mlrun/model_monitoring/application.py CHANGED Viewed

@@ -16,13 +16,13 @@ import dataclasses
 import json
 import re
 from abc import ABC, abstractmethod
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, cast
 import numpy as np
 import pandas as pd
 import mlrun.common.helpers
-import mlrun.common.schemas.model_monitoring
+import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.model_monitoring.constants as mm_constant
 import mlrun.utils.v3io_clients
 from mlrun.datastore import get_stream_pusher
@@ -84,8 +84,8 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         class MyApp(ApplicationBase):
             def do_tracking(
                 self,
-                sample_df_stats: pd.DataFrame,
-                feature_stats: pd.DataFrame,
+                sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
+                feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
                 start_infer_time: pd.Timestamp,
                 end_infer_time: pd.Timestamp,
                 schedule_time: pd.Timestamp,
@@ -93,7 +93,7 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
                 endpoint_id: str,
                 output_stream_uri: str,
             ) -> ModelMonitoringApplicationResult:
-                self.context.log_artifact(TableArtifact("sample_df_stats", df=sample_df_stats))
+                self.context.log_artifact(TableArtifact("sample_df_stats", df=self.dict_to_histogram(sample_df_stats)))
                 return ModelMonitoringApplicationResult(
                     name="data_drift_test",
                     value=0.5,
@@ -126,14 +126,16 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         return results, event
     def _lazy_init(self, app_name: str):
-        self.context = self._create_context_for_logging(app_name=app_name)
+        self.context = cast(
+            mlrun.MLClientCtx, self._create_context_for_logging(app_name=app_name)
+        )
     @abstractmethod
     def do_tracking(
         self,
         application_name: str,
-        sample_df_stats: pd.DataFrame,
-        feature_stats: pd.DataFrame,
+        sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
+        feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
         sample_df: pd.DataFrame,
         start_infer_time: pd.Timestamp,
         end_infer_time: pd.Timestamp,
@@ -147,8 +149,8 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         Implement this method with your custom monitoring logic.
         :param application_name:         (str) the app name
-        :param sample_df_stats:         (pd.DataFrame) The new sample distribution DataFrame.
-        :param feature_stats:           (pd.DataFrame) The train sample distribution DataFrame.
+        :param sample_df_stats:         (FeatureStats) The new sample distribution dictionary.
+        :param feature_stats:           (FeatureStats) The train sample distribution dictionary.
         :param sample_df:               (pd.DataFrame) The new sample DataFrame.
         :param start_infer_time:        (pd.Timestamp) Start time of the monitoring schedule.
         :param end_infer_time:          (pd.Timestamp) End time of the monitoring schedule.
@@ -167,8 +169,8 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         event: dict[str, Any],
     ) -> tuple[
         str,
-        pd.DataFrame,
-        pd.DataFrame,
+        mlrun.common.model_monitoring.helpers.FeatureStats,
+        mlrun.common.model_monitoring.helpers.FeatureStats,
         pd.DataFrame,
         pd.Timestamp,
         pd.Timestamp,
@@ -184,8 +186,8 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         :return: A tuple of:
                      [0] = (str) application name
-                     [1] = (pd.DataFrame) current input statistics
-                     [2] = (pd.DataFrame) train statistics
+                     [1] = (dict) current input statistics
+                     [2] = (dict) train statistics
                      [3] = (pd.DataFrame) current input data
                      [4] = (pd.Timestamp) start time of the monitoring schedule
                      [5] = (pd.Timestamp) end time of the monitoring schedule
@@ -197,12 +199,8 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         end_time = pd.Timestamp(event[mm_constant.ApplicationEvent.END_INFER_TIME])
         return (
             event[mm_constant.ApplicationEvent.APPLICATION_NAME],
-            cls._dict_to_histogram(
-                json.loads(event[mm_constant.ApplicationEvent.CURRENT_STATS])
-            ),
-            cls._dict_to_histogram(
-                json.loads(event[mm_constant.ApplicationEvent.FEATURE_STATS])
-            ),
+            json.loads(event[mm_constant.ApplicationEvent.CURRENT_STATS]),
+            json.loads(event[mm_constant.ApplicationEvent.FEATURE_STATS]),
             ParquetTarget(
                 path=event[mm_constant.ApplicationEvent.SAMPLE_PARQUET_PATH]
             ).as_df(start_time=start_time, end_time=end_time, time_column="timestamp"),
@@ -223,7 +221,9 @@ class ModelMonitoringApplicationBase(StepToDict, ABC):
         return context
     @staticmethod
-    def _dict_to_histogram(histogram_dict: dict[str, dict[str, Any]]) -> pd.DataFrame:
+    def dict_to_histogram(
+        histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
+    ) -> pd.DataFrame:
         """
         Convert histogram dictionary to pandas DataFrame with feature histograms as columns

mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc5py3-none-any.whl → 1.7.0rc7py3-none-any.whl