mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -15,21 +15,26 @@
|
|
|
15
15
|
import datetime
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
if typing.TYPE_CHECKING:
|
|
22
|
+
from mlrun.db.base import RunDBInterface
|
|
23
|
+
from mlrun.projects import MlrunProject
|
|
24
|
+
|
|
18
25
|
import mlrun
|
|
26
|
+
import mlrun.artifacts
|
|
19
27
|
import mlrun.common.model_monitoring.helpers
|
|
20
|
-
import mlrun.common.schemas
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
28
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
29
|
+
import mlrun.data_types.infer
|
|
30
|
+
import mlrun.model_monitoring
|
|
31
|
+
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
32
|
+
ModelEndpointMonitoringMetric,
|
|
33
|
+
_compose_full_name,
|
|
24
34
|
)
|
|
25
|
-
from mlrun.errors import MLRunValueError
|
|
26
35
|
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
27
36
|
from mlrun.utils import logger
|
|
28
37
|
|
|
29
|
-
if typing.TYPE_CHECKING:
|
|
30
|
-
from mlrun.db.base import RunDBInterface
|
|
31
|
-
from mlrun.projects import MlrunProject
|
|
32
|
-
|
|
33
38
|
|
|
34
39
|
class _BatchDict(typing.TypedDict):
|
|
35
40
|
minutes: int
|
|
@@ -37,42 +42,41 @@ class _BatchDict(typing.TypedDict):
|
|
|
37
42
|
days: int
|
|
38
43
|
|
|
39
44
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
+
def get_stream_path(
|
|
46
|
+
project: str,
|
|
47
|
+
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
48
|
+
stream_uri: typing.Optional[str] = None,
|
|
49
|
+
) -> str:
|
|
45
50
|
"""
|
|
46
51
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
47
52
|
|
|
48
53
|
:param project: Project name.
|
|
49
|
-
:param
|
|
54
|
+
:param function_name: Application name. Default is model_monitoring_stream.
|
|
55
|
+
:param stream_uri: Stream URI. If provided, it will be used instead of the one from the project secret.
|
|
50
56
|
|
|
51
57
|
:return: Monitoring stream path to the relevant application.
|
|
52
58
|
"""
|
|
53
59
|
|
|
54
|
-
stream_uri = mlrun.get_secret_or_env(
|
|
55
|
-
|
|
56
|
-
if application_name is None
|
|
57
|
-
else ""
|
|
58
|
-
) or mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
59
|
-
project=project,
|
|
60
|
-
kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
|
|
61
|
-
target="online",
|
|
62
|
-
application_name=application_name,
|
|
60
|
+
stream_uri = stream_uri or mlrun.get_secret_or_env(
|
|
61
|
+
mm_constants.ProjectSecretKeys.STREAM_PATH
|
|
63
62
|
)
|
|
64
63
|
|
|
65
|
-
if
|
|
66
|
-
stream_uri =
|
|
64
|
+
if not stream_uri or stream_uri == "v3io":
|
|
65
|
+
stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
66
|
+
project=project,
|
|
67
|
+
kind=mm_constants.FileTargetKind.STREAM,
|
|
68
|
+
target="online",
|
|
69
|
+
function_name=function_name,
|
|
70
|
+
)
|
|
67
71
|
|
|
68
72
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
69
|
-
stream_uri=stream_uri, project=project,
|
|
73
|
+
stream_uri=stream_uri, project=project, function_name=function_name
|
|
70
74
|
)
|
|
71
75
|
|
|
72
76
|
|
|
73
77
|
def get_monitoring_parquet_path(
|
|
74
78
|
project: "MlrunProject",
|
|
75
|
-
kind: str =
|
|
79
|
+
kind: str = mm_constants.FileTargetKind.PARQUET,
|
|
76
80
|
) -> str:
|
|
77
81
|
"""Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
|
|
78
82
|
project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
|
|
@@ -94,7 +98,7 @@ def get_monitoring_parquet_path(
|
|
|
94
98
|
return parquet_path
|
|
95
99
|
|
|
96
100
|
|
|
97
|
-
def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
101
|
+
def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -> str:
|
|
98
102
|
"""Get endpoint store connection string from the project secret. If wasn't set, take it from the system
|
|
99
103
|
configurations.
|
|
100
104
|
|
|
@@ -104,12 +108,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
|
104
108
|
|
|
105
109
|
"""
|
|
106
110
|
|
|
107
|
-
return (
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
111
|
+
return mlrun.get_secret_or_env(
|
|
112
|
+
key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
|
|
113
|
+
secret_provider=secret_provider,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_tsdb_connection_string(
|
|
118
|
+
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
119
|
+
) -> str:
|
|
120
|
+
"""Get TSDB connection string from the project secret. If wasn't set, take it from the system
|
|
121
|
+
configurations.
|
|
122
|
+
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
123
|
+
:return: Valid TSDB connection string.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
return mlrun.get_secret_or_env(
|
|
127
|
+
key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
|
|
128
|
+
secret_provider=secret_provider,
|
|
113
129
|
)
|
|
114
130
|
|
|
115
131
|
|
|
@@ -128,31 +144,38 @@ def _get_monitoring_time_window_from_controller_run(
|
|
|
128
144
|
project: str, db: "RunDBInterface"
|
|
129
145
|
) -> datetime.timedelta:
|
|
130
146
|
"""
|
|
131
|
-
Get
|
|
147
|
+
Get the base period form the controller.
|
|
132
148
|
|
|
133
149
|
:param project: Project name.
|
|
134
150
|
:param db: DB interface.
|
|
135
151
|
|
|
136
152
|
:return: Timedelta for the controller to run.
|
|
153
|
+
:raise: MLRunNotFoundError if the controller isn't deployed yet
|
|
137
154
|
"""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
raise
|
|
147
|
-
|
|
148
|
-
|
|
155
|
+
|
|
156
|
+
controller = db.get_function(
|
|
157
|
+
name=mm_constants.MonitoringFunctionNames.APPLICATION_CONTROLLER,
|
|
158
|
+
project=project,
|
|
159
|
+
)
|
|
160
|
+
if isinstance(controller, dict):
|
|
161
|
+
controller = mlrun.runtimes.RemoteRuntime.from_dict(controller)
|
|
162
|
+
elif not hasattr(controller, "to_dict"):
|
|
163
|
+
raise mlrun.errors.MLRunNotFoundError()
|
|
164
|
+
base_period = controller.spec.config["spec.triggers.cron_interval"]["attributes"][
|
|
165
|
+
"interval"
|
|
166
|
+
]
|
|
167
|
+
batch_dict = {
|
|
168
|
+
mm_constants.EventFieldType.MINUTES: int(base_period[:-1]),
|
|
169
|
+
mm_constants.EventFieldType.HOURS: 0,
|
|
170
|
+
mm_constants.EventFieldType.DAYS: 0,
|
|
171
|
+
}
|
|
149
172
|
return batch_dict2timedelta(batch_dict)
|
|
150
173
|
|
|
151
174
|
|
|
152
175
|
def update_model_endpoint_last_request(
|
|
153
176
|
project: str,
|
|
154
177
|
model_endpoint: ModelEndpoint,
|
|
155
|
-
current_request: datetime,
|
|
178
|
+
current_request: datetime.datetime,
|
|
156
179
|
db: "RunDBInterface",
|
|
157
180
|
) -> None:
|
|
158
181
|
"""
|
|
@@ -163,7 +186,8 @@ def update_model_endpoint_last_request(
|
|
|
163
186
|
:param current_request: current request time
|
|
164
187
|
:param db: DB interface.
|
|
165
188
|
"""
|
|
166
|
-
|
|
189
|
+
is_model_server_endpoint = model_endpoint.spec.stream_path != ""
|
|
190
|
+
if is_model_server_endpoint:
|
|
167
191
|
current_request = current_request.isoformat()
|
|
168
192
|
logger.info(
|
|
169
193
|
"Update model endpoint last request time (EP with serving)",
|
|
@@ -175,14 +199,15 @@ def update_model_endpoint_last_request(
|
|
|
175
199
|
db.patch_model_endpoint(
|
|
176
200
|
project=project,
|
|
177
201
|
endpoint_id=model_endpoint.metadata.uid,
|
|
178
|
-
attributes={EventFieldType.LAST_REQUEST: current_request},
|
|
202
|
+
attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
|
|
179
203
|
)
|
|
180
|
-
else:
|
|
204
|
+
else: # model endpoint without any serving function - close the window "manually"
|
|
181
205
|
try:
|
|
182
206
|
time_window = _get_monitoring_time_window_from_controller_run(project, db)
|
|
183
|
-
except
|
|
184
|
-
logger.
|
|
185
|
-
"Not bumping model endpoint last request time -
|
|
207
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
208
|
+
logger.warn(
|
|
209
|
+
"Not bumping model endpoint last request time - the monitoring controller isn't deployed yet.\n"
|
|
210
|
+
"Call `project.enable_model_monitoring()` first."
|
|
186
211
|
)
|
|
187
212
|
return
|
|
188
213
|
|
|
@@ -204,5 +229,124 @@ def update_model_endpoint_last_request(
|
|
|
204
229
|
db.patch_model_endpoint(
|
|
205
230
|
project=project,
|
|
206
231
|
endpoint_id=model_endpoint.metadata.uid,
|
|
207
|
-
attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
232
|
+
attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
208
233
|
)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def calculate_inputs_statistics(
|
|
237
|
+
sample_set_statistics: dict, inputs: pd.DataFrame
|
|
238
|
+
) -> mlrun.common.model_monitoring.helpers.FeatureStats:
|
|
239
|
+
"""
|
|
240
|
+
Calculate the inputs data statistics for drift monitoring purpose.
|
|
241
|
+
|
|
242
|
+
:param sample_set_statistics: The sample set (stored end point's dataset to reference) statistics. The bins of the
|
|
243
|
+
histograms of each feature will be used to recalculate the histograms of the inputs.
|
|
244
|
+
:param inputs: The inputs to calculate their statistics and later on - the drift with respect to the
|
|
245
|
+
sample set.
|
|
246
|
+
|
|
247
|
+
:returns: The calculated statistics of the inputs data.
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
# Use `DFDataInfer` to calculate the statistics over the inputs:
|
|
251
|
+
inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
|
|
252
|
+
df=inputs, options=mlrun.data_types.infer.InferOptions.Histogram
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Recalculate the histograms over the bins that are set in the sample-set of the end point:
|
|
256
|
+
for feature in list(inputs_statistics):
|
|
257
|
+
if feature in sample_set_statistics:
|
|
258
|
+
counts, bins = np.histogram(
|
|
259
|
+
inputs[feature].to_numpy(),
|
|
260
|
+
bins=sample_set_statistics[feature]["hist"][1],
|
|
261
|
+
)
|
|
262
|
+
inputs_statistics[feature]["hist"] = [
|
|
263
|
+
counts.tolist(),
|
|
264
|
+
bins.tolist(),
|
|
265
|
+
]
|
|
266
|
+
else:
|
|
267
|
+
# If the feature is not in the sample set and doesn't have a histogram, remove it from the statistics:
|
|
268
|
+
inputs_statistics.pop(feature)
|
|
269
|
+
|
|
270
|
+
return inputs_statistics
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_endpoint_record(
|
|
274
|
+
project: str,
|
|
275
|
+
endpoint_id: str,
|
|
276
|
+
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
277
|
+
) -> dict[str, typing.Any]:
|
|
278
|
+
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
279
|
+
project=project, secret_provider=secret_provider
|
|
280
|
+
)
|
|
281
|
+
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_result_instance_fqn(
|
|
285
|
+
model_endpoint_id: str, app_name: str, result_name: str
|
|
286
|
+
) -> str:
|
|
287
|
+
return f"{model_endpoint_id}.{app_name}.result.{result_name}"
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
291
|
+
return get_result_instance_fqn(
|
|
292
|
+
model_endpoint_id,
|
|
293
|
+
mm_constants.HistogramDataDriftApplicationConstants.NAME,
|
|
294
|
+
mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def get_invocations_fqn(project: str) -> str:
|
|
299
|
+
return _compose_full_name(
|
|
300
|
+
project=project,
|
|
301
|
+
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
302
|
+
name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
|
|
303
|
+
type=mm_constants.ModelEndpointMonitoringMetricType.METRIC,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
|
|
308
|
+
"""
|
|
309
|
+
Return the invocations metric of any model endpoint in the given project.
|
|
310
|
+
|
|
311
|
+
:param project: The project name.
|
|
312
|
+
:returns: The model monitoring metric object.
|
|
313
|
+
"""
|
|
314
|
+
return ModelEndpointMonitoringMetric(
|
|
315
|
+
project=project,
|
|
316
|
+
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
317
|
+
type=mm_constants.ModelEndpointMonitoringMetricType.METRIC,
|
|
318
|
+
name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
|
|
319
|
+
full_name=get_invocations_fqn(project),
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def enrich_model_endpoint_with_model_uri(
|
|
324
|
+
model_endpoint: ModelEndpoint,
|
|
325
|
+
model_obj: mlrun.artifacts.ModelArtifact,
|
|
326
|
+
):
|
|
327
|
+
"""
|
|
328
|
+
Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
|
|
329
|
+
to the model object that includes the project, db_key, iter, and tree.
|
|
330
|
+
In addition, we verify that the model object is of type `ModelArtifact`.
|
|
331
|
+
|
|
332
|
+
:param model_endpoint: An object representing the model endpoint that will be enriched with the model uri.
|
|
333
|
+
:param model_obj: An object representing the model artifact.
|
|
334
|
+
|
|
335
|
+
:raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
|
|
336
|
+
"""
|
|
337
|
+
mlrun.utils.helpers.verify_field_of_type(
|
|
338
|
+
field_name="model_endpoint.spec.model_uri",
|
|
339
|
+
field_value=model_obj,
|
|
340
|
+
expected_type=mlrun.artifacts.ModelArtifact,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Update model_uri with a unique reference to handle future changes
|
|
344
|
+
model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
|
|
345
|
+
project=model_endpoint.metadata.project,
|
|
346
|
+
key=model_obj.db_key,
|
|
347
|
+
iter=model_obj.iter,
|
|
348
|
+
tree=model_obj.tree,
|
|
349
|
+
)
|
|
350
|
+
model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
|
|
351
|
+
kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
|
|
352
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import abc
|
|
16
|
+
import dataclasses
|
|
17
|
+
from typing import ClassVar, Optional
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class HistogramDistanceMetric(abc.ABC):
|
|
24
|
+
"""
|
|
25
|
+
An abstract base class for distance metrics between histograms.
|
|
26
|
+
|
|
27
|
+
:args distrib_t: array of distribution t (usually the latest dataset distribution)
|
|
28
|
+
:args distrib_u: array of distribution u (usually the sample dataset distribution)
|
|
29
|
+
|
|
30
|
+
Each distribution must contain nonnegative floats that sum up to 1.0.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
distrib_t: np.ndarray
|
|
34
|
+
distrib_u: np.ndarray
|
|
35
|
+
|
|
36
|
+
NAME: ClassVar[str]
|
|
37
|
+
|
|
38
|
+
# noinspection PyMethodOverriding
|
|
39
|
+
def __init_subclass__(cls, *, metric_name: str, **kwargs) -> None:
|
|
40
|
+
super().__init_subclass__(**kwargs)
|
|
41
|
+
cls.NAME = metric_name
|
|
42
|
+
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def compute(self) -> float:
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TotalVarianceDistance(HistogramDistanceMetric, metric_name="tvd"):
|
|
49
|
+
"""
|
|
50
|
+
Provides a symmetric drift distance between two periods t and u
|
|
51
|
+
Z - vector of random variables
|
|
52
|
+
Pt - Probability distribution over time span t
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def compute(self) -> float:
|
|
56
|
+
"""
|
|
57
|
+
Calculate Total Variance distance.
|
|
58
|
+
|
|
59
|
+
:returns: Total Variance Distance.
|
|
60
|
+
"""
|
|
61
|
+
return np.sum(np.abs(self.distrib_t - self.distrib_u)) / 2
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class HellingerDistance(HistogramDistanceMetric, metric_name="hellinger"):
|
|
65
|
+
"""
|
|
66
|
+
Hellinger distance is an f divergence measure, similar to the Kullback-Leibler (KL) divergence.
|
|
67
|
+
It used to quantify the difference between two probability distributions.
|
|
68
|
+
However, unlike KL Divergence the Hellinger divergence is symmetric and bounded over a probability space.
|
|
69
|
+
The output range of Hellinger distance is [0,1]. The closer to 0, the more similar the two distributions.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def compute(self) -> float:
|
|
73
|
+
"""
|
|
74
|
+
Calculate Hellinger Distance
|
|
75
|
+
|
|
76
|
+
:returns: Hellinger Distance
|
|
77
|
+
"""
|
|
78
|
+
return np.sqrt(
|
|
79
|
+
max(
|
|
80
|
+
1 - np.sum(np.sqrt(self.distrib_u * self.distrib_t)),
|
|
81
|
+
0, # numerical errors may produce small negative numbers, e.g. -1e-16.
|
|
82
|
+
# However, Cauchy-Schwarz inequality assures this number is in the range [0, 1]
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
|
|
88
|
+
"""
|
|
89
|
+
KL Divergence (or relative entropy) is a measure of how one probability distribution differs from another.
|
|
90
|
+
It is an asymmetric measure (thus it's not a metric) and it doesn't satisfy the triangle inequality.
|
|
91
|
+
KL Divergence of 0, indicates two identical distributions.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _calc_kl_div(
|
|
96
|
+
actual_dist: np.ndarray, expected_dist: np.ndarray, zero_scaling: float
|
|
97
|
+
) -> float:
|
|
98
|
+
"""Return the asymmetric KL divergence"""
|
|
99
|
+
# We take 0*log(0) == 0 for this calculation
|
|
100
|
+
mask = actual_dist != 0
|
|
101
|
+
actual_dist = actual_dist[mask]
|
|
102
|
+
expected_dist = expected_dist[mask]
|
|
103
|
+
with np.errstate(over="ignore"):
|
|
104
|
+
# Ignore overflow warnings when dividing by small numbers,
|
|
105
|
+
# resulting in inf:
|
|
106
|
+
# RuntimeWarning: overflow encountered in true_divide
|
|
107
|
+
relative_prob = actual_dist / np.where(
|
|
108
|
+
expected_dist != 0, expected_dist, zero_scaling
|
|
109
|
+
)
|
|
110
|
+
return np.sum(actual_dist * np.log(relative_prob))
|
|
111
|
+
|
|
112
|
+
def compute(
|
|
113
|
+
self, capping: Optional[float] = None, zero_scaling: float = 1e-4
|
|
114
|
+
) -> float:
|
|
115
|
+
"""
|
|
116
|
+
:param capping: A bounded value for the KL Divergence. For infinite distance, the result is replaced with
|
|
117
|
+
the capping value which indicates a huge differences between the distributions.
|
|
118
|
+
:param zero_scaling: Will be used to replace 0 values for executing the logarithmic operation.
|
|
119
|
+
|
|
120
|
+
:returns: symmetric KL Divergence
|
|
121
|
+
"""
|
|
122
|
+
t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, zero_scaling)
|
|
123
|
+
u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, zero_scaling)
|
|
124
|
+
result = t_u + u_t
|
|
125
|
+
if capping and result == float("inf"):
|
|
126
|
+
return capping
|
|
127
|
+
return result
|
|
@@ -17,6 +17,7 @@ from dataclasses import dataclass, field
|
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
19
|
import mlrun.model
|
|
20
|
+
from mlrun.common.model_monitoring.helpers import FeatureStats
|
|
20
21
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
21
22
|
EndpointType,
|
|
22
23
|
EventKeyMetrics,
|
|
@@ -42,8 +43,8 @@ class ModelEndpointSpec(mlrun.model.ModelObj):
|
|
|
42
43
|
|
|
43
44
|
@dataclass
|
|
44
45
|
class ModelEndpointStatus(mlrun.model.ModelObj):
|
|
45
|
-
feature_stats:
|
|
46
|
-
current_stats:
|
|
46
|
+
feature_stats: FeatureStats = field(default_factory=dict)
|
|
47
|
+
current_stats: FeatureStats = field(default_factory=dict)
|
|
47
48
|
first_request: str = ""
|
|
48
49
|
last_request: str = ""
|
|
49
50
|
error_count: int = 0
|