mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +26 -112
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +46 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +47 -48
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +69 -0
- mlrun/common/db/sql_session.py +2 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/common/formatters/artifact.py +21 -0
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/helpers.py +1 -2
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +24 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +18 -8
- mlrun/common/schemas/auth.py +11 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +8 -7
- mlrun/common/schemas/function.py +5 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +18 -3
- mlrun/common/schemas/model_monitoring/constants.py +83 -26
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +1 -10
- mlrun/common/schemas/project.py +24 -23
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/common/types.py +7 -1
- mlrun/config.py +54 -17
- mlrun/data_types/to_pandas.py +10 -12
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +17 -5
- mlrun/datastore/base.py +62 -39
- mlrun/datastore/datastore.py +28 -9
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +6 -2
- mlrun/datastore/s3.py +9 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +201 -96
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +358 -104
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +5 -1
- mlrun/db/base.py +185 -35
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +614 -179
- mlrun/db/nopdb.py +210 -26
- mlrun/errors.py +12 -1
- mlrun/execution.py +41 -24
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -72
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +28 -30
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/conversion.py +11 -13
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +37 -34
- mlrun/features.py +9 -20
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +4 -3
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +14 -16
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +17 -11
- mlrun/launcher/remote.py +16 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +238 -73
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +138 -315
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +24 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +104 -84
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +127 -28
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stream_processing.py +62 -231
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +35 -21
- mlrun/projects/pipelines.py +68 -99
- mlrun/projects/project.py +830 -266
- mlrun/render.py +3 -11
- mlrun/run.py +162 -166
- mlrun/runtimes/__init__.py +62 -7
- mlrun/runtimes/base.py +39 -32
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +6 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
- mlrun/runtimes/pod.py +286 -88
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +51 -34
- mlrun/runtimes/utils.py +7 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +13 -10
- mlrun/serving/server.py +22 -26
- mlrun/serving/states.py +99 -25
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +59 -20
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +183 -197
- mlrun/utils/http.py +2 -5
- mlrun/utils/logger.py +76 -14
- mlrun/utils/notifications/notification/__init__.py +17 -12
- mlrun/utils/notifications/notification/base.py +14 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +155 -30
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -4
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc2.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/model_monitoring/api.py
CHANGED
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import hashlib
|
|
16
|
-
import json
|
|
17
16
|
import typing
|
|
17
|
+
import warnings
|
|
18
18
|
from datetime import datetime
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
@@ -22,13 +22,14 @@ import pandas as pd
|
|
|
22
22
|
|
|
23
23
|
import mlrun.artifacts
|
|
24
24
|
import mlrun.common.helpers
|
|
25
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
25
26
|
import mlrun.feature_store
|
|
26
|
-
|
|
27
|
+
import mlrun.model_monitoring.application
|
|
28
|
+
import mlrun.model_monitoring.applications as mm_app
|
|
29
|
+
import mlrun.serving
|
|
27
30
|
from mlrun.data_types.infer import InferOptions, get_df_stats
|
|
28
31
|
from mlrun.utils import datetime_now, logger
|
|
29
32
|
|
|
30
|
-
from .batch import VirtualDrift
|
|
31
|
-
from .features_drift_table import FeaturesDriftTablePlot
|
|
32
33
|
from .helpers import update_model_endpoint_last_request
|
|
33
34
|
from .model_endpoint import ModelEndpoint
|
|
34
35
|
|
|
@@ -45,10 +46,10 @@ def get_or_create_model_endpoint(
|
|
|
45
46
|
endpoint_id: str = "",
|
|
46
47
|
function_name: str = "",
|
|
47
48
|
context: mlrun.MLClientCtx = None,
|
|
48
|
-
sample_set_statistics:
|
|
49
|
+
sample_set_statistics: dict[str, typing.Any] = None,
|
|
49
50
|
drift_threshold: float = None,
|
|
50
51
|
possible_drift_threshold: float = None,
|
|
51
|
-
monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.disabled,
|
|
52
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
52
53
|
db_session=None,
|
|
53
54
|
) -> ModelEndpoint:
|
|
54
55
|
"""
|
|
@@ -82,7 +83,7 @@ def get_or_create_model_endpoint(
|
|
|
82
83
|
if not endpoint_id:
|
|
83
84
|
# Generate a new model endpoint id based on the project name and model name
|
|
84
85
|
endpoint_id = hashlib.sha1(
|
|
85
|
-
f"{project}_{model_endpoint_name}".encode(
|
|
86
|
+
f"{project}_{model_endpoint_name}".encode()
|
|
86
87
|
).hexdigest()
|
|
87
88
|
|
|
88
89
|
if not db_session:
|
|
@@ -128,20 +129,19 @@ def record_results(
|
|
|
128
129
|
context: typing.Optional[mlrun.MLClientCtx] = None,
|
|
129
130
|
infer_results_df: typing.Optional[pd.DataFrame] = None,
|
|
130
131
|
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
131
|
-
monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.enabled,
|
|
132
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
133
|
+
# Deprecated arguments:
|
|
132
134
|
drift_threshold: typing.Optional[float] = None,
|
|
133
135
|
possible_drift_threshold: typing.Optional[float] = None,
|
|
134
136
|
trigger_monitoring_job: bool = False,
|
|
135
137
|
artifacts_tag: str = "",
|
|
136
|
-
default_batch_image="mlrun/mlrun",
|
|
138
|
+
default_batch_image: str = "mlrun/mlrun",
|
|
137
139
|
) -> ModelEndpoint:
|
|
138
140
|
"""
|
|
139
141
|
Write a provided inference dataset to model endpoint parquet target. If not exist, generate a new model endpoint
|
|
140
142
|
record and use the provided sample set statistics as feature stats that will be used later for the drift analysis.
|
|
141
|
-
To
|
|
142
|
-
|
|
143
|
-
input data (along with the outputs). The drift rule is the value per-feature mean of the TVD and Hellinger scores
|
|
144
|
-
according to the provided thresholds.
|
|
143
|
+
To activate model monitoring, run `project.enable_model_monitoring()`. The model monitoring applications will be
|
|
144
|
+
triggered with the recorded data according to a periodic schedule.
|
|
145
145
|
|
|
146
146
|
:param project: Project name.
|
|
147
147
|
:param model_path: The model Store path.
|
|
@@ -160,17 +160,47 @@ def record_results(
|
|
|
160
160
|
the current model endpoint.
|
|
161
161
|
:param monitoring_mode: If enabled, apply model monitoring features on the provided endpoint id. Enabled
|
|
162
162
|
by default.
|
|
163
|
-
:param drift_threshold: The threshold of which to mark drifts.
|
|
164
|
-
:param possible_drift_threshold: The threshold of which to mark possible drifts.
|
|
165
|
-
:param trigger_monitoring_job: If true, run the batch drift job. If not exists, the monitoring
|
|
166
|
-
will be registered through MLRun API with the provided image.
|
|
167
|
-
:param artifacts_tag: Tag to use for all the artifacts resulted from the function.
|
|
168
|
-
only if the monitoring batch job has been triggered.
|
|
169
|
-
|
|
170
|
-
|
|
163
|
+
:param drift_threshold: (deprecated) The threshold of which to mark drifts.
|
|
164
|
+
:param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts.
|
|
165
|
+
:param trigger_monitoring_job: (deprecated) If true, run the batch drift job. If not exists, the monitoring
|
|
166
|
+
batch function will be registered through MLRun API with the provided image.
|
|
167
|
+
:param artifacts_tag: (deprecated) Tag to use for all the artifacts resulted from the function.
|
|
168
|
+
Will be relevant only if the monitoring batch job has been triggered.
|
|
169
|
+
:param default_batch_image: (deprecated) The image that will be used when registering the model monitoring
|
|
170
|
+
batch job.
|
|
171
171
|
|
|
172
172
|
:return: A ModelEndpoint object
|
|
173
173
|
"""
|
|
174
|
+
|
|
175
|
+
if drift_threshold is not None or possible_drift_threshold is not None:
|
|
176
|
+
warnings.warn(
|
|
177
|
+
"Custom drift threshold arguments are deprecated since version "
|
|
178
|
+
"1.7.0 and have no effect. They will be removed in version 1.9.0.\n"
|
|
179
|
+
"To enable the default histogram data drift application, run:\n"
|
|
180
|
+
"`project.enable_model_monitoring()`.",
|
|
181
|
+
FutureWarning,
|
|
182
|
+
)
|
|
183
|
+
if trigger_monitoring_job is not False:
|
|
184
|
+
warnings.warn(
|
|
185
|
+
"`trigger_monitoring_job` argument is deprecated since version "
|
|
186
|
+
"1.7.0 and has no effect. It will be removed in version 1.9.0.\n"
|
|
187
|
+
"To enable the default histogram data drift application, run:\n"
|
|
188
|
+
"`project.enable_model_monitoring()`.",
|
|
189
|
+
FutureWarning,
|
|
190
|
+
)
|
|
191
|
+
if artifacts_tag != "":
|
|
192
|
+
warnings.warn(
|
|
193
|
+
"`artifacts_tag` argument is deprecated since version "
|
|
194
|
+
"1.7.0 and has no effect. It will be removed in version 1.9.0.",
|
|
195
|
+
FutureWarning,
|
|
196
|
+
)
|
|
197
|
+
if default_batch_image != "mlrun/mlrun":
|
|
198
|
+
warnings.warn(
|
|
199
|
+
"`default_batch_image` argument is deprecated since version "
|
|
200
|
+
"1.7.0 and has no effect. It will be removed in version 1.9.0.",
|
|
201
|
+
FutureWarning,
|
|
202
|
+
)
|
|
203
|
+
|
|
174
204
|
db = mlrun.get_run_db()
|
|
175
205
|
|
|
176
206
|
model_endpoint = get_or_create_model_endpoint(
|
|
@@ -181,8 +211,6 @@ def record_results(
|
|
|
181
211
|
function_name=function_name,
|
|
182
212
|
context=context,
|
|
183
213
|
sample_set_statistics=sample_set_statistics,
|
|
184
|
-
drift_threshold=drift_threshold,
|
|
185
|
-
possible_drift_threshold=possible_drift_threshold,
|
|
186
214
|
monitoring_mode=monitoring_mode,
|
|
187
215
|
db_session=db,
|
|
188
216
|
)
|
|
@@ -206,40 +234,13 @@ def record_results(
|
|
|
206
234
|
db=db,
|
|
207
235
|
)
|
|
208
236
|
|
|
209
|
-
if trigger_monitoring_job:
|
|
210
|
-
# Run the monitoring batch drift job
|
|
211
|
-
trigger_drift_batch_job(
|
|
212
|
-
project=project,
|
|
213
|
-
default_batch_image=default_batch_image,
|
|
214
|
-
model_endpoints_ids=[model_endpoint.metadata.uid],
|
|
215
|
-
db_session=db,
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
# Getting drift thresholds if not provided
|
|
219
|
-
drift_threshold, possible_drift_threshold = get_drift_thresholds_if_not_none(
|
|
220
|
-
model_endpoint=model_endpoint,
|
|
221
|
-
drift_threshold=drift_threshold,
|
|
222
|
-
possible_drift_threshold=possible_drift_threshold,
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
perform_drift_analysis(
|
|
226
|
-
project=project,
|
|
227
|
-
context=context,
|
|
228
|
-
sample_set_statistics=model_endpoint.status.feature_stats,
|
|
229
|
-
drift_threshold=drift_threshold,
|
|
230
|
-
possible_drift_threshold=possible_drift_threshold,
|
|
231
|
-
artifacts_tag=artifacts_tag,
|
|
232
|
-
endpoint_id=model_endpoint.metadata.uid,
|
|
233
|
-
db_session=db,
|
|
234
|
-
)
|
|
235
|
-
|
|
236
237
|
return model_endpoint
|
|
237
238
|
|
|
238
239
|
|
|
239
240
|
def _model_endpoint_validations(
|
|
240
241
|
model_endpoint: ModelEndpoint,
|
|
241
242
|
model_path: str = "",
|
|
242
|
-
sample_set_statistics:
|
|
243
|
+
sample_set_statistics: dict[str, typing.Any] = None,
|
|
243
244
|
drift_threshold: float = None,
|
|
244
245
|
possible_drift_threshold: float = None,
|
|
245
246
|
):
|
|
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
|
|
|
282
283
|
# drift and possible drift thresholds
|
|
283
284
|
if drift_threshold:
|
|
284
285
|
current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
285
|
-
EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
286
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
286
287
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
287
288
|
)
|
|
288
289
|
if current_drift_threshold != drift_threshold:
|
|
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
|
|
|
293
294
|
|
|
294
295
|
if possible_drift_threshold:
|
|
295
296
|
current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
296
|
-
EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
297
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
297
298
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
298
299
|
)
|
|
299
300
|
if current_possible_drift_threshold != possible_drift_threshold:
|
|
@@ -303,40 +304,6 @@ def _model_endpoint_validations(
|
|
|
303
304
|
)
|
|
304
305
|
|
|
305
306
|
|
|
306
|
-
def get_drift_thresholds_if_not_none(
|
|
307
|
-
model_endpoint: ModelEndpoint,
|
|
308
|
-
drift_threshold: float = None,
|
|
309
|
-
possible_drift_threshold: float = None,
|
|
310
|
-
) -> typing.Tuple[float, float]:
|
|
311
|
-
"""
|
|
312
|
-
Get drift and possible drift thresholds. If one of the thresholds is missing, will try to retrieve
|
|
313
|
-
it from the `ModelEndpoint` object. If not defined under the `ModelEndpoint` as well, will retrieve it from
|
|
314
|
-
the default mlrun configuration.
|
|
315
|
-
|
|
316
|
-
:param model_endpoint: `ModelEndpoint` object.
|
|
317
|
-
:param drift_threshold: The threshold of which to mark drifts.
|
|
318
|
-
:param possible_drift_threshold: The threshold of which to mark possible drifts.
|
|
319
|
-
|
|
320
|
-
:return: A Tuple of:
|
|
321
|
-
[0] drift threshold as a float
|
|
322
|
-
[1] possible drift threshold as a float
|
|
323
|
-
"""
|
|
324
|
-
if not drift_threshold:
|
|
325
|
-
# Getting drift threshold value from either model endpoint or monitoring default configurations
|
|
326
|
-
drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
327
|
-
EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
328
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
329
|
-
)
|
|
330
|
-
if not possible_drift_threshold:
|
|
331
|
-
# Getting possible drift threshold value from either model endpoint or monitoring default configurations
|
|
332
|
-
possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
333
|
-
EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
334
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
return drift_threshold, possible_drift_threshold
|
|
338
|
-
|
|
339
|
-
|
|
340
307
|
def write_monitoring_df(
|
|
341
308
|
endpoint_id: str,
|
|
342
309
|
infer_results_df: pd.DataFrame,
|
|
@@ -366,14 +333,14 @@ def write_monitoring_df(
|
|
|
366
333
|
)
|
|
367
334
|
|
|
368
335
|
# Modify the DataFrame to the required structure that will be used later by the monitoring batch job
|
|
369
|
-
if EventFieldType.TIMESTAMP not in infer_results_df.columns:
|
|
336
|
+
if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
|
|
370
337
|
# Initialize timestamp column with the current time
|
|
371
|
-
infer_results_df[EventFieldType.TIMESTAMP] = infer_datetime
|
|
338
|
+
infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
|
|
372
339
|
|
|
373
340
|
# `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
|
|
374
341
|
# the ingest process
|
|
375
|
-
infer_results_df[EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
376
|
-
infer_results_df.set_index(EventFieldType.ENDPOINT_ID, inplace=True)
|
|
342
|
+
infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
343
|
+
infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
|
|
377
344
|
|
|
378
345
|
monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
|
|
379
346
|
|
|
@@ -386,10 +353,10 @@ def _generate_model_endpoint(
|
|
|
386
353
|
model_endpoint_name: str,
|
|
387
354
|
function_name: str,
|
|
388
355
|
context: mlrun.MLClientCtx,
|
|
389
|
-
sample_set_statistics:
|
|
356
|
+
sample_set_statistics: dict[str, typing.Any],
|
|
390
357
|
drift_threshold: float,
|
|
391
358
|
possible_drift_threshold: float,
|
|
392
|
-
monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.disabled,
|
|
359
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
393
360
|
) -> ModelEndpoint:
|
|
394
361
|
"""
|
|
395
362
|
Write a new model endpoint record.
|
|
@@ -428,11 +395,11 @@ def _generate_model_endpoint(
|
|
|
428
395
|
model_endpoint.spec.model_class = "drift-analysis"
|
|
429
396
|
if drift_threshold:
|
|
430
397
|
model_endpoint.spec.monitor_configuration[
|
|
431
|
-
EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
398
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
432
399
|
] = drift_threshold
|
|
433
400
|
if possible_drift_threshold:
|
|
434
401
|
model_endpoint.spec.monitor_configuration[
|
|
435
|
-
EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
402
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
436
403
|
] = possible_drift_threshold
|
|
437
404
|
|
|
438
405
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
@@ -449,79 +416,12 @@ def _generate_model_endpoint(
|
|
|
449
416
|
return db_session.get_model_endpoint(project=project, endpoint_id=endpoint_id)
|
|
450
417
|
|
|
451
418
|
|
|
452
|
-
def trigger_drift_batch_job(
|
|
453
|
-
project: str,
|
|
454
|
-
default_batch_image="mlrun/mlrun",
|
|
455
|
-
model_endpoints_ids: typing.List[str] = None,
|
|
456
|
-
batch_intervals_dict: typing.Dict[str, float] = None,
|
|
457
|
-
db_session=None,
|
|
458
|
-
):
|
|
459
|
-
"""
|
|
460
|
-
Run model monitoring drift analysis job. If not exists, the monitoring batch function will be registered through
|
|
461
|
-
MLRun API with the provided image.
|
|
462
|
-
|
|
463
|
-
:param project: Project name.
|
|
464
|
-
:param default_batch_image: The image that will be used when registering the model monitoring batch job.
|
|
465
|
-
:param model_endpoints_ids: List of model endpoints to include in the current run.
|
|
466
|
-
:param batch_intervals_dict: Batch interval range (days, hours, minutes). By default, the batch interval is
|
|
467
|
-
configured to run through the last hour.
|
|
468
|
-
:param db_session: A runtime session that manages the current dialog with the database.
|
|
469
|
-
|
|
470
|
-
"""
|
|
471
|
-
if not model_endpoints_ids:
|
|
472
|
-
raise mlrun.errors.MLRunNotFoundError(
|
|
473
|
-
"No model endpoints provided",
|
|
474
|
-
)
|
|
475
|
-
if not db_session:
|
|
476
|
-
db_session = mlrun.get_run_db()
|
|
477
|
-
|
|
478
|
-
# Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
|
|
479
|
-
batch_function_dict: typing.Dict[str, typing.Any] = (
|
|
480
|
-
db_session.deploy_monitoring_batch_job(
|
|
481
|
-
project=project,
|
|
482
|
-
default_batch_image=default_batch_image,
|
|
483
|
-
)
|
|
484
|
-
)
|
|
485
|
-
|
|
486
|
-
# Prepare current run params
|
|
487
|
-
job_params = _generate_job_params(
|
|
488
|
-
model_endpoints_ids=model_endpoints_ids,
|
|
489
|
-
batch_intervals_dict=batch_intervals_dict,
|
|
490
|
-
)
|
|
491
|
-
|
|
492
|
-
# Generate runtime and trigger the job function
|
|
493
|
-
batch_function = mlrun.new_function(runtime=batch_function_dict)
|
|
494
|
-
batch_function.run(name="model-monitoring-batch", params=job_params, watch=True)
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
def _generate_job_params(
|
|
498
|
-
model_endpoints_ids: typing.List[str],
|
|
499
|
-
batch_intervals_dict: typing.Dict[str, float] = None,
|
|
500
|
-
):
|
|
501
|
-
"""
|
|
502
|
-
Generate the required params for the model monitoring batch job function.
|
|
503
|
-
|
|
504
|
-
:param model_endpoints_ids: List of model endpoints to include in the current run.
|
|
505
|
-
:param batch_intervals_dict: Batch interval range (days, hours, minutes). By default, the batch interval is
|
|
506
|
-
configured to run through the last hour.
|
|
507
|
-
|
|
508
|
-
"""
|
|
509
|
-
if not batch_intervals_dict:
|
|
510
|
-
# Generate default batch intervals dict
|
|
511
|
-
batch_intervals_dict = {"minutes": 0, "hours": 1, "days": 0}
|
|
512
|
-
|
|
513
|
-
return {
|
|
514
|
-
"model_endpoints": model_endpoints_ids,
|
|
515
|
-
"batch_intervals_dict": batch_intervals_dict,
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
|
|
519
419
|
def get_sample_set_statistics(
|
|
520
420
|
sample_set: DatasetType = None,
|
|
521
421
|
model_artifact_feature_stats: dict = None,
|
|
522
|
-
sample_set_columns: typing.Optional[
|
|
523
|
-
sample_set_drop_columns: typing.Optional[
|
|
524
|
-
sample_set_label_columns: typing.Optional[
|
|
422
|
+
sample_set_columns: typing.Optional[list] = None,
|
|
423
|
+
sample_set_drop_columns: typing.Optional[list] = None,
|
|
424
|
+
sample_set_label_columns: typing.Optional[list] = None,
|
|
525
425
|
) -> dict:
|
|
526
426
|
"""
|
|
527
427
|
Get the sample set statistics either from the given sample set or the statistics logged with the model while
|
|
@@ -576,10 +476,10 @@ def get_sample_set_statistics(
|
|
|
576
476
|
|
|
577
477
|
def read_dataset_as_dataframe(
|
|
578
478
|
dataset: DatasetType,
|
|
579
|
-
feature_columns: typing.Union[str,
|
|
580
|
-
label_columns: typing.Union[str,
|
|
581
|
-
drop_columns: typing.Union[str,
|
|
582
|
-
) ->
|
|
479
|
+
feature_columns: typing.Union[str, list[str]] = None,
|
|
480
|
+
label_columns: typing.Union[str, list[str]] = None,
|
|
481
|
+
drop_columns: typing.Union[str, list[str], int, list[int]] = None,
|
|
482
|
+
) -> tuple[pd.DataFrame, list[str]]:
|
|
583
483
|
"""
|
|
584
484
|
Parse the given dataset into a DataFrame and drop the columns accordingly. In addition, the label columns will be
|
|
585
485
|
parsed and validated as well.
|
|
@@ -661,152 +561,6 @@ def read_dataset_as_dataframe(
|
|
|
661
561
|
return dataset, label_columns
|
|
662
562
|
|
|
663
563
|
|
|
664
|
-
def perform_drift_analysis(
|
|
665
|
-
project: str,
|
|
666
|
-
endpoint_id: str,
|
|
667
|
-
context: mlrun.MLClientCtx,
|
|
668
|
-
sample_set_statistics: dict,
|
|
669
|
-
drift_threshold: float,
|
|
670
|
-
possible_drift_threshold: float,
|
|
671
|
-
artifacts_tag: str = "",
|
|
672
|
-
db_session=None,
|
|
673
|
-
):
|
|
674
|
-
"""
|
|
675
|
-
Calculate drift per feature and produce the drift table artifact for logging post prediction. Note that most of
|
|
676
|
-
the calculations were already made through the monitoring batch job.
|
|
677
|
-
|
|
678
|
-
:param project: Project name.
|
|
679
|
-
:param endpoint_id: Model endpoint unique ID.
|
|
680
|
-
:param context: MLRun context. Will log the artifacts.
|
|
681
|
-
:param sample_set_statistics: The statistics of the sample set logged along a model.
|
|
682
|
-
:param drift_threshold: The threshold of which to mark drifts.
|
|
683
|
-
:param possible_drift_threshold: The threshold of which to mark possible drifts.
|
|
684
|
-
:param artifacts_tag: Tag to use for all the artifacts resulted from the function.
|
|
685
|
-
:param db_session: A runtime session that manages the current dialog with the database.
|
|
686
|
-
|
|
687
|
-
"""
|
|
688
|
-
if not db_session:
|
|
689
|
-
db_session = mlrun.get_run_db()
|
|
690
|
-
|
|
691
|
-
model_endpoint = db_session.get_model_endpoint(
|
|
692
|
-
project=project, endpoint_id=endpoint_id
|
|
693
|
-
)
|
|
694
|
-
|
|
695
|
-
# Get the drift metrics results along with the feature statistics from the latest batch
|
|
696
|
-
metrics = model_endpoint.status.drift_measures
|
|
697
|
-
inputs_statistics = model_endpoint.status.current_stats
|
|
698
|
-
|
|
699
|
-
inputs_statistics.pop("timestamp", None)
|
|
700
|
-
|
|
701
|
-
# Calculate drift for each feature
|
|
702
|
-
virtual_drift = VirtualDrift()
|
|
703
|
-
drift_results = virtual_drift.check_for_drift_per_feature(
|
|
704
|
-
metrics_results_dictionary=metrics,
|
|
705
|
-
possible_drift_threshold=possible_drift_threshold,
|
|
706
|
-
drift_detected_threshold=drift_threshold,
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
# Drift table plot
|
|
710
|
-
html_plot = FeaturesDriftTablePlot().produce(
|
|
711
|
-
features=list(inputs_statistics.keys()),
|
|
712
|
-
sample_set_statistics=sample_set_statistics,
|
|
713
|
-
inputs_statistics=inputs_statistics,
|
|
714
|
-
metrics=metrics,
|
|
715
|
-
drift_results=drift_results,
|
|
716
|
-
)
|
|
717
|
-
|
|
718
|
-
# Prepare drift result per feature dictionary
|
|
719
|
-
metrics_per_feature = {
|
|
720
|
-
feature: _get_drift_result(
|
|
721
|
-
tvd=metric_dictionary["tvd"],
|
|
722
|
-
hellinger=metric_dictionary["hellinger"],
|
|
723
|
-
threshold=drift_threshold,
|
|
724
|
-
)[1]
|
|
725
|
-
for feature, metric_dictionary in metrics.items()
|
|
726
|
-
if isinstance(metric_dictionary, dict)
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
# Calculate the final analysis result as well
|
|
730
|
-
drift_status, drift_metric = _get_drift_result(
|
|
731
|
-
tvd=metrics["tvd_mean"],
|
|
732
|
-
hellinger=metrics["hellinger_mean"],
|
|
733
|
-
threshold=drift_threshold,
|
|
734
|
-
)
|
|
735
|
-
# Log the different artifacts
|
|
736
|
-
_log_drift_artifacts(
|
|
737
|
-
context=context,
|
|
738
|
-
html_plot=html_plot,
|
|
739
|
-
metrics_per_feature=metrics_per_feature,
|
|
740
|
-
drift_status=drift_status,
|
|
741
|
-
drift_metric=drift_metric,
|
|
742
|
-
artifacts_tag=artifacts_tag,
|
|
743
|
-
)
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
def _log_drift_artifacts(
|
|
747
|
-
context: mlrun.MLClientCtx,
|
|
748
|
-
html_plot: str,
|
|
749
|
-
metrics_per_feature: typing.Dict[str, float],
|
|
750
|
-
drift_status: bool,
|
|
751
|
-
drift_metric: float,
|
|
752
|
-
artifacts_tag: str,
|
|
753
|
-
):
|
|
754
|
-
"""
|
|
755
|
-
Log the following artifacts/results:
|
|
756
|
-
1 - Drift table plot which includes a detailed drift analysis per feature
|
|
757
|
-
2 - Drift result per feature in a JSON format
|
|
758
|
-
3 - Results of the total drift analysis
|
|
759
|
-
|
|
760
|
-
:param context: MLRun context. Will log the artifacts.
|
|
761
|
-
:param html_plot: Body of the html file of the plot.
|
|
762
|
-
:param metrics_per_feature: Dictionary in which the key is a feature name and the value is the drift numerical
|
|
763
|
-
result.
|
|
764
|
-
:param drift_status: Boolean value that represents the final drift analysis result.
|
|
765
|
-
:param drift_metric: The final drift numerical result.
|
|
766
|
-
:param artifacts_tag: Tag to use for all the artifacts resulted from the function.
|
|
767
|
-
|
|
768
|
-
"""
|
|
769
|
-
context.log_artifact(
|
|
770
|
-
mlrun.artifacts.Artifact(
|
|
771
|
-
body=html_plot.encode("utf-8"), format="html", key="drift_table_plot"
|
|
772
|
-
),
|
|
773
|
-
tag=artifacts_tag,
|
|
774
|
-
)
|
|
775
|
-
context.log_artifact(
|
|
776
|
-
mlrun.artifacts.Artifact(
|
|
777
|
-
body=json.dumps(metrics_per_feature),
|
|
778
|
-
format="json",
|
|
779
|
-
key="features_drift_results",
|
|
780
|
-
),
|
|
781
|
-
tag=artifacts_tag,
|
|
782
|
-
)
|
|
783
|
-
context.log_results(
|
|
784
|
-
results={"drift_status": drift_status, "drift_metric": drift_metric}
|
|
785
|
-
)
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
def _get_drift_result(
|
|
789
|
-
tvd: float,
|
|
790
|
-
hellinger: float,
|
|
791
|
-
threshold: float,
|
|
792
|
-
) -> typing.Tuple[bool, float]:
|
|
793
|
-
"""
|
|
794
|
-
Calculate the drift result by the following equation: (tvd + hellinger) / 2
|
|
795
|
-
|
|
796
|
-
:param tvd: The feature's TVD value.
|
|
797
|
-
:param hellinger: The feature's Hellinger value.
|
|
798
|
-
:param threshold: The threshold from which the value is considered a drift.
|
|
799
|
-
|
|
800
|
-
:returns: A tuple of:
|
|
801
|
-
[0] = Boolean value as the drift status.
|
|
802
|
-
[1] = The result.
|
|
803
|
-
"""
|
|
804
|
-
result = (tvd + hellinger) / 2
|
|
805
|
-
if result >= threshold:
|
|
806
|
-
return True, result
|
|
807
|
-
return False, result
|
|
808
|
-
|
|
809
|
-
|
|
810
564
|
def log_result(
|
|
811
565
|
context: mlrun.MLClientCtx,
|
|
812
566
|
result_set_name: str,
|
|
@@ -829,3 +583,72 @@ def log_result(
|
|
|
829
583
|
key="batch_id",
|
|
830
584
|
value=batch_id,
|
|
831
585
|
)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _create_model_monitoring_function_base(
|
|
589
|
+
*,
|
|
590
|
+
project: str,
|
|
591
|
+
func: typing.Union[str, None] = None,
|
|
592
|
+
application_class: typing.Union[
|
|
593
|
+
str,
|
|
594
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
595
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
596
|
+
None,
|
|
597
|
+
] = None,
|
|
598
|
+
name: typing.Optional[str] = None,
|
|
599
|
+
image: typing.Optional[str] = None,
|
|
600
|
+
tag: typing.Optional[str] = None,
|
|
601
|
+
requirements: typing.Union[str, list[str], None] = None,
|
|
602
|
+
requirements_file: str = "",
|
|
603
|
+
**application_kwargs,
|
|
604
|
+
) -> mlrun.runtimes.ServingRuntime:
|
|
605
|
+
"""
|
|
606
|
+
Note: this is an internal API only.
|
|
607
|
+
This function does not set the labels or mounts v3io.
|
|
608
|
+
"""
|
|
609
|
+
if isinstance(
|
|
610
|
+
application_class,
|
|
611
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
612
|
+
):
|
|
613
|
+
warnings.warn(
|
|
614
|
+
"The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
|
|
615
|
+
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
616
|
+
FutureWarning,
|
|
617
|
+
)
|
|
618
|
+
if name in mm_constants.MonitoringFunctionNames.list():
|
|
619
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
620
|
+
f"An application cannot have the following names: "
|
|
621
|
+
f"{mm_constants.MonitoringFunctionNames.list()}"
|
|
622
|
+
)
|
|
623
|
+
if func is None:
|
|
624
|
+
func = ""
|
|
625
|
+
func_obj = typing.cast(
|
|
626
|
+
mlrun.runtimes.ServingRuntime,
|
|
627
|
+
mlrun.code_to_function(
|
|
628
|
+
filename=func,
|
|
629
|
+
name=name,
|
|
630
|
+
project=project,
|
|
631
|
+
tag=tag,
|
|
632
|
+
kind=mlrun.run.RuntimeKinds.serving,
|
|
633
|
+
image=image,
|
|
634
|
+
requirements=requirements,
|
|
635
|
+
requirements_file=requirements_file,
|
|
636
|
+
),
|
|
637
|
+
)
|
|
638
|
+
graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
|
|
639
|
+
prepare_step = graph.to(
|
|
640
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
|
|
641
|
+
name="PrepareMonitoringEvent",
|
|
642
|
+
application_name=name,
|
|
643
|
+
)
|
|
644
|
+
if isinstance(application_class, str):
|
|
645
|
+
app_step = prepare_step.to(class_name=application_class, **application_kwargs)
|
|
646
|
+
else:
|
|
647
|
+
app_step = prepare_step.to(class_name=application_class)
|
|
648
|
+
app_step.to(
|
|
649
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
|
|
650
|
+
name="PushToMonitoringWriter",
|
|
651
|
+
project=project,
|
|
652
|
+
writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
653
|
+
).respond()
|
|
654
|
+
return func_obj
|