mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +26 -112
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +46 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +47 -48
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +69 -0
- mlrun/common/db/sql_session.py +2 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/common/formatters/artifact.py +21 -0
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/helpers.py +1 -2
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +24 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +18 -8
- mlrun/common/schemas/auth.py +11 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +8 -7
- mlrun/common/schemas/function.py +5 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +18 -3
- mlrun/common/schemas/model_monitoring/constants.py +83 -26
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +1 -10
- mlrun/common/schemas/project.py +24 -23
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/common/types.py +7 -1
- mlrun/config.py +54 -17
- mlrun/data_types/to_pandas.py +10 -12
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +17 -5
- mlrun/datastore/base.py +62 -39
- mlrun/datastore/datastore.py +28 -9
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +6 -2
- mlrun/datastore/s3.py +9 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +201 -96
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +358 -104
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +5 -1
- mlrun/db/base.py +185 -35
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +614 -179
- mlrun/db/nopdb.py +210 -26
- mlrun/errors.py +12 -1
- mlrun/execution.py +41 -24
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -72
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +28 -30
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/conversion.py +11 -13
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +37 -34
- mlrun/features.py +9 -20
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +4 -3
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +14 -16
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +17 -11
- mlrun/launcher/remote.py +16 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +238 -73
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +138 -315
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +24 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +104 -84
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +127 -28
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stream_processing.py +62 -231
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +35 -21
- mlrun/projects/pipelines.py +68 -99
- mlrun/projects/project.py +830 -266
- mlrun/render.py +3 -11
- mlrun/run.py +162 -166
- mlrun/runtimes/__init__.py +62 -7
- mlrun/runtimes/base.py +39 -32
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +6 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
- mlrun/runtimes/pod.py +286 -88
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +51 -34
- mlrun/runtimes/utils.py +7 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +13 -10
- mlrun/serving/server.py +22 -26
- mlrun/serving/states.py +99 -25
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +59 -20
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +183 -197
- mlrun/utils/http.py +2 -5
- mlrun/utils/logger.py +76 -14
- mlrun/utils/notifications/notification/__init__.py +17 -12
- mlrun/utils/notifications/notification/base.py +14 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +155 -30
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -4
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc2.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
|
@@ -15,20 +15,26 @@
|
|
|
15
15
|
import datetime
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
18
21
|
import mlrun
|
|
19
22
|
import mlrun.common.model_monitoring.helpers
|
|
20
23
|
import mlrun.common.schemas
|
|
21
24
|
from mlrun.common.schemas.model_monitoring import (
|
|
22
25
|
EventFieldType,
|
|
23
|
-
MonitoringFunctionNames,
|
|
24
26
|
)
|
|
25
|
-
from mlrun.
|
|
27
|
+
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
28
|
+
ModelEndpointMonitoringMetricType,
|
|
29
|
+
_compose_full_name,
|
|
30
|
+
)
|
|
26
31
|
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
27
32
|
from mlrun.utils import logger
|
|
28
33
|
|
|
29
34
|
if typing.TYPE_CHECKING:
|
|
30
35
|
from mlrun.db.base import RunDBInterface
|
|
31
36
|
from mlrun.projects import MlrunProject
|
|
37
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
class _BatchDict(typing.TypedDict):
|
|
@@ -37,36 +43,32 @@ class _BatchDict(typing.TypedDict):
|
|
|
37
43
|
days: int
|
|
38
44
|
|
|
39
45
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def get_stream_path(project: str = None, application_name: str = None) -> str:
|
|
46
|
+
def get_stream_path(
|
|
47
|
+
project: str = None,
|
|
48
|
+
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
49
|
+
) -> str:
|
|
45
50
|
"""
|
|
46
51
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
47
52
|
|
|
48
53
|
:param project: Project name.
|
|
49
|
-
:param
|
|
54
|
+
:param function_name: Application name. Default is model_monitoring_stream.
|
|
50
55
|
|
|
51
56
|
:return: Monitoring stream path to the relevant application.
|
|
52
57
|
"""
|
|
53
58
|
|
|
54
59
|
stream_uri = mlrun.get_secret_or_env(
|
|
55
60
|
mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
|
|
56
|
-
if application_name is None
|
|
57
|
-
else ""
|
|
58
61
|
) or mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
59
62
|
project=project,
|
|
60
63
|
kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
|
|
61
64
|
target="online",
|
|
62
|
-
|
|
65
|
+
function_name=function_name,
|
|
63
66
|
)
|
|
64
67
|
|
|
65
68
|
if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
|
|
66
|
-
stream_uri = stream_uri[1]
|
|
67
|
-
|
|
69
|
+
stream_uri = stream_uri[1] # get new stream path, under projects
|
|
68
70
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
69
|
-
stream_uri=stream_uri, project=project,
|
|
71
|
+
stream_uri=stream_uri, project=project, function_name=function_name
|
|
70
72
|
)
|
|
71
73
|
|
|
72
74
|
|
|
@@ -113,6 +115,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
|
113
115
|
)
|
|
114
116
|
|
|
115
117
|
|
|
118
|
+
def get_tsdb_connection_string(
|
|
119
|
+
secret_provider: typing.Optional[typing.Callable] = None,
|
|
120
|
+
) -> str:
|
|
121
|
+
"""Get TSDB connection string from the project secret. If wasn't set, take it from the system
|
|
122
|
+
configurations.
|
|
123
|
+
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
124
|
+
:return: Valid TSDB connection string.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
return (
|
|
128
|
+
mlrun.get_secret_or_env(
|
|
129
|
+
key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
|
|
130
|
+
secret_provider=secret_provider,
|
|
131
|
+
)
|
|
132
|
+
or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
116
136
|
def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
|
|
117
137
|
"""
|
|
118
138
|
Convert a batch dictionary to timedelta.
|
|
@@ -128,24 +148,31 @@ def _get_monitoring_time_window_from_controller_run(
|
|
|
128
148
|
project: str, db: "RunDBInterface"
|
|
129
149
|
) -> datetime.timedelta:
|
|
130
150
|
"""
|
|
131
|
-
Get
|
|
151
|
+
Get the base period form the controller.
|
|
132
152
|
|
|
133
153
|
:param project: Project name.
|
|
134
154
|
:param db: DB interface.
|
|
135
155
|
|
|
136
156
|
:return: Timedelta for the controller to run.
|
|
157
|
+
:raise: MLRunNotFoundError if the controller isn't deployed yet
|
|
137
158
|
"""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
raise
|
|
147
|
-
|
|
148
|
-
|
|
159
|
+
|
|
160
|
+
controller = db.get_function(
|
|
161
|
+
name=mm_constants.MonitoringFunctionNames.APPLICATION_CONTROLLER,
|
|
162
|
+
project=project,
|
|
163
|
+
)
|
|
164
|
+
if isinstance(controller, dict):
|
|
165
|
+
controller = mlrun.runtimes.RemoteRuntime.from_dict(controller)
|
|
166
|
+
elif not hasattr(controller, "to_dict"):
|
|
167
|
+
raise mlrun.errors.MLRunNotFoundError()
|
|
168
|
+
base_period = controller.spec.config["spec.triggers.cron_interval"]["attributes"][
|
|
169
|
+
"interval"
|
|
170
|
+
]
|
|
171
|
+
batch_dict = {
|
|
172
|
+
mm_constants.EventFieldType.MINUTES: int(base_period[:-1]),
|
|
173
|
+
mm_constants.EventFieldType.HOURS: 0,
|
|
174
|
+
mm_constants.EventFieldType.DAYS: 0,
|
|
175
|
+
}
|
|
149
176
|
return batch_dict2timedelta(batch_dict)
|
|
150
177
|
|
|
151
178
|
|
|
@@ -180,9 +207,9 @@ def update_model_endpoint_last_request(
|
|
|
180
207
|
else:
|
|
181
208
|
try:
|
|
182
209
|
time_window = _get_monitoring_time_window_from_controller_run(project, db)
|
|
183
|
-
except
|
|
210
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
184
211
|
logger.debug(
|
|
185
|
-
"Not bumping model endpoint last request time -
|
|
212
|
+
"Not bumping model endpoint last request time - the monitoring controller isn't deployed yet"
|
|
186
213
|
)
|
|
187
214
|
return
|
|
188
215
|
|
|
@@ -206,3 +233,75 @@ def update_model_endpoint_last_request(
|
|
|
206
233
|
endpoint_id=model_endpoint.metadata.uid,
|
|
207
234
|
attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
208
235
|
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def calculate_inputs_statistics(
|
|
239
|
+
sample_set_statistics: dict, inputs: pd.DataFrame
|
|
240
|
+
) -> mlrun.common.model_monitoring.helpers.FeatureStats:
|
|
241
|
+
"""
|
|
242
|
+
Calculate the inputs data statistics for drift monitoring purpose.
|
|
243
|
+
|
|
244
|
+
:param sample_set_statistics: The sample set (stored end point's dataset to reference) statistics. The bins of the
|
|
245
|
+
histograms of each feature will be used to recalculate the histograms of the inputs.
|
|
246
|
+
:param inputs: The inputs to calculate their statistics and later on - the drift with respect to the
|
|
247
|
+
sample set.
|
|
248
|
+
|
|
249
|
+
:returns: The calculated statistics of the inputs data.
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
# Use `DFDataInfer` to calculate the statistics over the inputs:
|
|
253
|
+
inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
|
|
254
|
+
df=inputs,
|
|
255
|
+
options=mlrun.data_types.infer.InferOptions.Histogram,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Recalculate the histograms over the bins that are set in the sample-set of the end point:
|
|
259
|
+
for feature in inputs_statistics.keys():
|
|
260
|
+
if feature in sample_set_statistics:
|
|
261
|
+
counts, bins = np.histogram(
|
|
262
|
+
inputs[feature].to_numpy(),
|
|
263
|
+
bins=sample_set_statistics[feature]["hist"][1],
|
|
264
|
+
)
|
|
265
|
+
inputs_statistics[feature]["hist"] = [
|
|
266
|
+
counts.tolist(),
|
|
267
|
+
bins.tolist(),
|
|
268
|
+
]
|
|
269
|
+
elif "hist" in inputs_statistics[feature]:
|
|
270
|
+
# Comply with the other common features' histogram length
|
|
271
|
+
mlrun.common.model_monitoring.helpers.pad_hist(
|
|
272
|
+
mlrun.common.model_monitoring.helpers.Histogram(
|
|
273
|
+
inputs_statistics[feature]["hist"]
|
|
274
|
+
)
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
return inputs_statistics
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_endpoint_record(project: str, endpoint_id: str):
|
|
281
|
+
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
282
|
+
project=project,
|
|
283
|
+
)
|
|
284
|
+
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def get_result_instance_fqn(
|
|
288
|
+
model_endpoint_id: str, app_name: str, result_name: str
|
|
289
|
+
) -> str:
|
|
290
|
+
return f"{model_endpoint_id}.{app_name}.result.{result_name}"
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
294
|
+
return get_result_instance_fqn(
|
|
295
|
+
model_endpoint_id,
|
|
296
|
+
mm_constants.HistogramDataDriftApplicationConstants.NAME,
|
|
297
|
+
mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def get_invocations_fqn(project: str) -> str:
|
|
302
|
+
return _compose_full_name(
|
|
303
|
+
project=project,
|
|
304
|
+
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
305
|
+
name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
|
|
306
|
+
type=ModelEndpointMonitoringMetricType.METRIC,
|
|
307
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import abc
|
|
16
|
+
import dataclasses
|
|
17
|
+
from typing import ClassVar, Optional
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class HistogramDistanceMetric(abc.ABC):
|
|
24
|
+
"""
|
|
25
|
+
An abstract base class for distance metrics between histograms.
|
|
26
|
+
|
|
27
|
+
:args distrib_t: array of distribution t (usually the latest dataset distribution)
|
|
28
|
+
:args distrib_u: array of distribution u (usually the sample dataset distribution)
|
|
29
|
+
|
|
30
|
+
Each distribution must contain nonnegative floats that sum up to 1.0.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
distrib_t: np.ndarray
|
|
34
|
+
distrib_u: np.ndarray
|
|
35
|
+
|
|
36
|
+
NAME: ClassVar[str]
|
|
37
|
+
|
|
38
|
+
# noinspection PyMethodOverriding
|
|
39
|
+
def __init_subclass__(cls, *, metric_name: str, **kwargs) -> None:
|
|
40
|
+
super().__init_subclass__(**kwargs)
|
|
41
|
+
cls.NAME = metric_name
|
|
42
|
+
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def compute(self) -> float:
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TotalVarianceDistance(HistogramDistanceMetric, metric_name="tvd"):
|
|
49
|
+
"""
|
|
50
|
+
Provides a symmetric drift distance between two periods t and u
|
|
51
|
+
Z - vector of random variables
|
|
52
|
+
Pt - Probability distribution over time span t
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def compute(self) -> float:
|
|
56
|
+
"""
|
|
57
|
+
Calculate Total Variance distance.
|
|
58
|
+
|
|
59
|
+
:returns: Total Variance Distance.
|
|
60
|
+
"""
|
|
61
|
+
return np.sum(np.abs(self.distrib_t - self.distrib_u)) / 2
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class HellingerDistance(HistogramDistanceMetric, metric_name="hellinger"):
|
|
65
|
+
"""
|
|
66
|
+
Hellinger distance is an f divergence measure, similar to the Kullback-Leibler (KL) divergence.
|
|
67
|
+
It used to quantify the difference between two probability distributions.
|
|
68
|
+
However, unlike KL Divergence the Hellinger divergence is symmetric and bounded over a probability space.
|
|
69
|
+
The output range of Hellinger distance is [0,1]. The closer to 0, the more similar the two distributions.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def compute(self) -> float:
|
|
73
|
+
"""
|
|
74
|
+
Calculate Hellinger Distance
|
|
75
|
+
|
|
76
|
+
:returns: Hellinger Distance
|
|
77
|
+
"""
|
|
78
|
+
return np.sqrt(
|
|
79
|
+
max(
|
|
80
|
+
1 - np.sum(np.sqrt(self.distrib_u * self.distrib_t)),
|
|
81
|
+
0, # numerical errors may produce small negative numbers, e.g. -1e-16.
|
|
82
|
+
# However, Cauchy-Schwarz inequality assures this number is in the range [0, 1]
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
|
|
88
|
+
"""
|
|
89
|
+
KL Divergence (or relative entropy) is a measure of how one probability distribution differs from another.
|
|
90
|
+
It is an asymmetric measure (thus it's not a metric) and it doesn't satisfy the triangle inequality.
|
|
91
|
+
KL Divergence of 0, indicates two identical distributions.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _calc_kl_div(
|
|
96
|
+
actual_dist: np.ndarray, expected_dist: np.ndarray, zero_scaling: float
|
|
97
|
+
) -> float:
|
|
98
|
+
"""Return the asymmetric KL divergence"""
|
|
99
|
+
# We take 0*log(0) == 0 for this calculation
|
|
100
|
+
mask = actual_dist != 0
|
|
101
|
+
actual_dist = actual_dist[mask]
|
|
102
|
+
expected_dist = expected_dist[mask]
|
|
103
|
+
with np.errstate(over="ignore"):
|
|
104
|
+
# Ignore overflow warnings when dividing by small numbers,
|
|
105
|
+
# resulting in inf:
|
|
106
|
+
# RuntimeWarning: overflow encountered in true_divide
|
|
107
|
+
relative_prob = actual_dist / np.where(
|
|
108
|
+
expected_dist != 0, expected_dist, zero_scaling
|
|
109
|
+
)
|
|
110
|
+
return np.sum(actual_dist * np.log(relative_prob))
|
|
111
|
+
|
|
112
|
+
def compute(
|
|
113
|
+
self, capping: Optional[float] = None, zero_scaling: float = 1e-4
|
|
114
|
+
) -> float:
|
|
115
|
+
"""
|
|
116
|
+
:param capping: A bounded value for the KL Divergence. For infinite distance, the result is replaced with
|
|
117
|
+
the capping value which indicates a huge differences between the distributions.
|
|
118
|
+
:param zero_scaling: Will be used to replace 0 values for executing the logarithmic operation.
|
|
119
|
+
|
|
120
|
+
:returns: symmetric KL Divergence
|
|
121
|
+
"""
|
|
122
|
+
t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, zero_scaling)
|
|
123
|
+
u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, zero_scaling)
|
|
124
|
+
result = t_u + u_t
|
|
125
|
+
if capping and result == float("inf"):
|
|
126
|
+
return capping
|
|
127
|
+
return result
|
|
@@ -17,6 +17,7 @@ from dataclasses import dataclass, field
|
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
19
|
import mlrun.model
|
|
20
|
+
from mlrun.common.model_monitoring.helpers import FeatureStats
|
|
20
21
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
21
22
|
EndpointType,
|
|
22
23
|
EventKeyMetrics,
|
|
@@ -42,8 +43,8 @@ class ModelEndpointSpec(mlrun.model.ModelObj):
|
|
|
42
43
|
|
|
43
44
|
@dataclass
|
|
44
45
|
class ModelEndpointStatus(mlrun.model.ModelObj):
|
|
45
|
-
feature_stats:
|
|
46
|
-
current_stats:
|
|
46
|
+
feature_stats: FeatureStats = field(default_factory=dict)
|
|
47
|
+
current_stats: FeatureStats = field(default_factory=dict)
|
|
47
48
|
first_request: str = ""
|
|
48
49
|
last_request: str = ""
|
|
49
50
|
error_count: int = 0
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
import typing
|
|
16
15
|
|
|
17
16
|
import prometheus_client
|
|
18
17
|
|
|
@@ -134,9 +133,7 @@ def write_predictions_and_latency_metrics(
|
|
|
134
133
|
|
|
135
134
|
|
|
136
135
|
@_write_registry
|
|
137
|
-
def write_income_features(
|
|
138
|
-
project: str, endpoint_id: str, features: typing.Dict[str, float]
|
|
139
|
-
):
|
|
136
|
+
def write_income_features(project: str, endpoint_id: str, features: dict[str, float]):
|
|
140
137
|
"""Update a sample of features.
|
|
141
138
|
|
|
142
139
|
:param project: Project name.
|