mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +4 -2
- mlrun/alerts/alert.py +75 -8
- mlrun/artifacts/base.py +1 -0
- mlrun/artifacts/manager.py +9 -2
- mlrun/common/constants.py +4 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
- mlrun/common/formatters/run.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/__init__.py +3 -1
- mlrun/common/schemas/alert.py +15 -12
- mlrun/common/schemas/api_gateway.py +6 -6
- mlrun/common/schemas/auth.py +5 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/frontend_spec.py +7 -0
- mlrun/common/schemas/function.py +7 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -3
- mlrun/common/schemas/model_monitoring/constants.py +41 -26
- mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
- mlrun/common/schemas/notification.py +69 -12
- mlrun/common/schemas/project.py +45 -12
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +1 -0
- mlrun/config.py +91 -35
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +57 -25
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/alibaba_oss.py +3 -2
- mlrun/datastore/azure_blob.py +125 -37
- mlrun/datastore/base.py +42 -21
- mlrun/datastore/datastore.py +4 -2
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +85 -29
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +25 -12
- mlrun/datastore/sources.py +76 -4
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +102 -131
- mlrun/datastore/v3io.py +1 -0
- mlrun/db/base.py +15 -6
- mlrun/db/httpdb.py +57 -28
- mlrun/db/nopdb.py +29 -5
- mlrun/errors.py +20 -3
- mlrun/execution.py +46 -5
- mlrun/feature_store/api.py +25 -1
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_vector.py +3 -1
- mlrun/feature_store/retrieval/job.py +4 -1
- mlrun/feature_store/retrieval/spark_merger.py +10 -39
- mlrun/feature_store/steps.py +8 -0
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -3
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/k8s_utils.py +48 -2
- mlrun/launcher/client.py +6 -6
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +215 -34
- mlrun/model_monitoring/api.py +38 -24
- mlrun/model_monitoring/applications/__init__.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +60 -29
- mlrun/model_monitoring/applications/base.py +2 -174
- mlrun/model_monitoring/applications/context.py +197 -70
- mlrun/model_monitoring/applications/evidently_base.py +11 -85
- mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
- mlrun/model_monitoring/applications/results.py +4 -4
- mlrun/model_monitoring/controller.py +110 -282
- mlrun/model_monitoring/db/stores/__init__.py +8 -3
- mlrun/model_monitoring/db/stores/base/store.py +3 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
- mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
- mlrun/model_monitoring/db/tsdb/base.py +147 -15
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
- mlrun/model_monitoring/helpers.py +70 -50
- mlrun/model_monitoring/stream_processing.py +96 -195
- mlrun/model_monitoring/writer.py +13 -5
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/projects/operations.py +16 -8
- mlrun/projects/pipelines.py +126 -115
- mlrun/projects/project.py +286 -129
- mlrun/render.py +3 -3
- mlrun/run.py +38 -19
- mlrun/runtimes/__init__.py +19 -8
- mlrun/runtimes/base.py +4 -1
- mlrun/runtimes/daskjob.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -1
- mlrun/runtimes/kubejob.py +6 -6
- mlrun/runtimes/local.py +12 -5
- mlrun/runtimes/nuclio/api_gateway.py +68 -8
- mlrun/runtimes/nuclio/application/application.py +307 -70
- mlrun/runtimes/nuclio/function.py +63 -14
- mlrun/runtimes/nuclio/serving.py +10 -10
- mlrun/runtimes/pod.py +25 -19
- mlrun/runtimes/remotesparkjob.py +2 -5
- mlrun/runtimes/sparkjob/spark3job.py +16 -17
- mlrun/runtimes/utils.py +34 -0
- mlrun/serving/routers.py +2 -5
- mlrun/serving/server.py +37 -19
- mlrun/serving/states.py +30 -3
- mlrun/serving/v2_serving.py +44 -35
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +150 -36
- mlrun/utils/http.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +0 -1
- mlrun/utils/notifications/notification/webhook.py +8 -1
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/v3io_clients.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/evidently_application.py +0 -20
- mlrun/model_monitoring/prometheus.py +0 -216
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
|
@@ -11,19 +11,22 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import json
|
|
15
|
-
import
|
|
16
|
+
import socket
|
|
17
|
+
from typing import Any, Optional, cast
|
|
16
18
|
|
|
17
19
|
import numpy as np
|
|
18
20
|
import pandas as pd
|
|
19
21
|
|
|
20
|
-
import mlrun.common.
|
|
21
|
-
import mlrun.common.model_monitoring.helpers
|
|
22
|
+
import mlrun.common.constants as mlrun_constants
|
|
22
23
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
23
24
|
import mlrun.feature_store as fstore
|
|
24
|
-
|
|
25
|
+
import mlrun.features
|
|
26
|
+
import mlrun.serving
|
|
27
|
+
import mlrun.utils
|
|
28
|
+
from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
|
|
25
29
|
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
|
|
26
|
-
from mlrun.execution import MLClientCtx
|
|
27
30
|
from mlrun.model_monitoring.helpers import (
|
|
28
31
|
calculate_inputs_statistics,
|
|
29
32
|
get_endpoint_record,
|
|
@@ -31,13 +34,17 @@ from mlrun.model_monitoring.helpers import (
|
|
|
31
34
|
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
32
35
|
|
|
33
36
|
|
|
34
|
-
class MonitoringApplicationContext
|
|
37
|
+
class MonitoringApplicationContext:
|
|
35
38
|
"""
|
|
36
39
|
The monitoring context holds all the relevant information for the monitoring application,
|
|
37
40
|
and also it can be used for logging artifacts and results.
|
|
38
41
|
The monitoring context has the following attributes:
|
|
39
42
|
|
|
40
|
-
:param application_name: (str)
|
|
43
|
+
:param application_name: (str) The model monitoring application name.
|
|
44
|
+
:param project_name: (str) The project name.
|
|
45
|
+
:param project: (MlrunProject) The project object.
|
|
46
|
+
:param logger: (mlrun.utils.Logger) MLRun logger.
|
|
47
|
+
:param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
|
|
41
48
|
:param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
|
|
42
49
|
:param feature_stats: (FeatureStats) The train sample distribution dictionary.
|
|
43
50
|
:param sample_df: (pd.DataFrame) The new sample DataFrame.
|
|
@@ -49,81 +56,82 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
49
56
|
:param model_endpoint: (ModelEndpoint) The model endpoint object.
|
|
50
57
|
:param feature_names: (list[str]) List of models feature names.
|
|
51
58
|
:param label_names: (list[str]) List of models label names.
|
|
52
|
-
:param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
|
|
53
|
-
|
|
59
|
+
:param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
|
|
60
|
+
and a list of extra data items.
|
|
54
61
|
"""
|
|
55
62
|
|
|
56
|
-
def __init__(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
self.endpoint_id: typing.Optional[str] = None
|
|
65
|
-
self.output_stream_uri: typing.Optional[str] = None
|
|
66
|
-
|
|
67
|
-
self._sample_df: typing.Optional[pd.DataFrame] = None
|
|
68
|
-
self._model_endpoint: typing.Optional[ModelEndpoint] = None
|
|
69
|
-
self._feature_stats: typing.Optional[FeatureStats] = None
|
|
70
|
-
self._sample_df_stats: typing.Optional[FeatureStats] = None
|
|
71
|
-
|
|
72
|
-
@classmethod
|
|
73
|
-
def from_dict(
|
|
74
|
-
cls,
|
|
75
|
-
attrs: dict,
|
|
76
|
-
context=None,
|
|
77
|
-
model_endpoint_dict=None,
|
|
78
|
-
**kwargs,
|
|
79
|
-
) -> "MonitoringApplicationContext":
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
*,
|
|
66
|
+
graph_context: mlrun.serving.GraphContext,
|
|
67
|
+
application_name: str,
|
|
68
|
+
event: dict[str, Any],
|
|
69
|
+
model_endpoint_dict: dict[str, ModelEndpoint],
|
|
70
|
+
) -> None:
|
|
80
71
|
"""
|
|
81
|
-
|
|
72
|
+
Initialize a `MonitoringApplicationContext` object.
|
|
73
|
+
Note: this object should not be instantiated manually.
|
|
82
74
|
|
|
83
|
-
:param
|
|
84
|
-
:param
|
|
75
|
+
:param application_name: The application name.
|
|
76
|
+
:param event: The instance data dictionary.
|
|
85
77
|
:param model_endpoint_dict: Dictionary of model endpoints.
|
|
86
|
-
|
|
87
78
|
"""
|
|
79
|
+
self.application_name = application_name
|
|
88
80
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
81
|
+
self.project_name = graph_context.project
|
|
82
|
+
self.project = mlrun.load_project(url=self.project_name)
|
|
83
|
+
|
|
84
|
+
# MLRun Logger
|
|
85
|
+
self.logger = mlrun.utils.create_logger(
|
|
86
|
+
level=mlrun.mlconf.log_level,
|
|
87
|
+
formatter_kind=mlrun.mlconf.log_formatter,
|
|
88
|
+
name="monitoring-application",
|
|
89
|
+
)
|
|
90
|
+
# Nuclio logger - `nuclio.request.Logger`.
|
|
91
|
+
# Note: this logger does not accept keyword arguments.
|
|
92
|
+
self.nuclio_logger = graph_context.logger
|
|
99
93
|
|
|
94
|
+
# event data
|
|
100
95
|
self.start_infer_time = pd.Timestamp(
|
|
101
|
-
|
|
96
|
+
cast(str, event.get(mm_constants.ApplicationEvent.START_INFER_TIME))
|
|
102
97
|
)
|
|
103
98
|
self.end_infer_time = pd.Timestamp(
|
|
104
|
-
|
|
99
|
+
cast(str, event.get(mm_constants.ApplicationEvent.END_INFER_TIME))
|
|
105
100
|
)
|
|
106
|
-
self.
|
|
107
|
-
|
|
101
|
+
self.endpoint_id = cast(
|
|
102
|
+
str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
|
|
108
103
|
)
|
|
109
|
-
self.
|
|
110
|
-
mm_constants.ApplicationEvent.
|
|
104
|
+
self.output_stream_uri = cast(
|
|
105
|
+
str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
|
|
111
106
|
)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
107
|
+
|
|
108
|
+
self._feature_stats: Optional[FeatureStats] = None
|
|
109
|
+
self._sample_df_stats: Optional[FeatureStats] = None
|
|
110
|
+
|
|
111
|
+
# Default labels for the artifacts
|
|
112
|
+
self._default_labels = self._get_default_labels()
|
|
113
|
+
|
|
114
|
+
# Persistent data - fetched when needed
|
|
115
|
+
self._sample_df: Optional[pd.DataFrame] = None
|
|
116
|
+
self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
|
|
117
|
+
self.endpoint_id
|
|
117
118
|
)
|
|
118
119
|
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
def _get_default_labels(self) -> dict[str, str]:
|
|
121
|
+
return {
|
|
122
|
+
mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
|
|
123
|
+
mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
|
|
124
|
+
mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
|
|
125
|
+
mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
|
|
126
|
+
}
|
|
121
127
|
|
|
122
|
-
|
|
128
|
+
def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
|
|
129
|
+
"""Add the default labels to logged artifacts labels"""
|
|
130
|
+
return (labels or {}) | self._default_labels
|
|
123
131
|
|
|
124
132
|
@property
|
|
125
133
|
def sample_df(self) -> pd.DataFrame:
|
|
126
|
-
if
|
|
134
|
+
if self._sample_df is None:
|
|
127
135
|
feature_set = fstore.get_feature_set(
|
|
128
136
|
self.model_endpoint.status.monitoring_feature_set_uri
|
|
129
137
|
)
|
|
@@ -146,15 +154,15 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
146
154
|
|
|
147
155
|
@property
|
|
148
156
|
def model_endpoint(self) -> ModelEndpoint:
|
|
149
|
-
if not
|
|
157
|
+
if not self._model_endpoint:
|
|
150
158
|
self._model_endpoint = ModelEndpoint.from_flat_dict(
|
|
151
|
-
get_endpoint_record(self.
|
|
159
|
+
get_endpoint_record(self.project_name, self.endpoint_id)
|
|
152
160
|
)
|
|
153
161
|
return self._model_endpoint
|
|
154
162
|
|
|
155
163
|
@property
|
|
156
164
|
def feature_stats(self) -> FeatureStats:
|
|
157
|
-
if not
|
|
165
|
+
if not self._feature_stats:
|
|
158
166
|
self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
|
|
159
167
|
pad_features_hist(self._feature_stats)
|
|
160
168
|
return self._feature_stats
|
|
@@ -162,7 +170,7 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
162
170
|
@property
|
|
163
171
|
def sample_df_stats(self) -> FeatureStats:
|
|
164
172
|
"""statistics of the sample dataframe"""
|
|
165
|
-
if not
|
|
173
|
+
if not self._sample_df_stats:
|
|
166
174
|
self._sample_df_stats = calculate_inputs_statistics(
|
|
167
175
|
self.feature_stats, self.sample_df
|
|
168
176
|
)
|
|
@@ -186,13 +194,11 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
186
194
|
|
|
187
195
|
@property
|
|
188
196
|
def model(self) -> tuple[str, ModelArtifact, dict]:
|
|
189
|
-
"""
|
|
197
|
+
"""The model file, model spec object, and a list of extra data items"""
|
|
190
198
|
return get_model(self.model_endpoint.spec.model_uri)
|
|
191
199
|
|
|
192
200
|
@staticmethod
|
|
193
|
-
def dict_to_histogram(
|
|
194
|
-
histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
195
|
-
) -> pd.DataFrame:
|
|
201
|
+
def dict_to_histogram(histogram_dict: FeatureStats) -> pd.DataFrame:
|
|
196
202
|
"""
|
|
197
203
|
Convert histogram dictionary to pandas DataFrame with feature histograms as columns
|
|
198
204
|
|
|
@@ -212,3 +218,124 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
212
218
|
histograms = pd.DataFrame(histograms)
|
|
213
219
|
|
|
214
220
|
return histograms
|
|
221
|
+
|
|
222
|
+
def log_artifact(
|
|
223
|
+
self,
|
|
224
|
+
item,
|
|
225
|
+
body=None,
|
|
226
|
+
tag: str = "",
|
|
227
|
+
local_path: str = "",
|
|
228
|
+
artifact_path: Optional[str] = None,
|
|
229
|
+
format: Optional[str] = None,
|
|
230
|
+
upload: Optional[bool] = None,
|
|
231
|
+
labels: Optional[dict[str, str]] = None,
|
|
232
|
+
target_path: Optional[str] = None,
|
|
233
|
+
**kwargs,
|
|
234
|
+
) -> Artifact:
|
|
235
|
+
"""
|
|
236
|
+
Log an artifact.
|
|
237
|
+
See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
|
|
238
|
+
"""
|
|
239
|
+
labels = self._add_default_labels(labels)
|
|
240
|
+
return self.project.log_artifact(
|
|
241
|
+
item,
|
|
242
|
+
body=body,
|
|
243
|
+
tag=tag,
|
|
244
|
+
local_path=local_path,
|
|
245
|
+
artifact_path=artifact_path,
|
|
246
|
+
format=format,
|
|
247
|
+
upload=upload,
|
|
248
|
+
labels=labels,
|
|
249
|
+
target_path=target_path,
|
|
250
|
+
**kwargs,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def log_dataset(
|
|
254
|
+
self,
|
|
255
|
+
key,
|
|
256
|
+
df,
|
|
257
|
+
tag="",
|
|
258
|
+
local_path=None,
|
|
259
|
+
artifact_path=None,
|
|
260
|
+
upload=None,
|
|
261
|
+
labels=None,
|
|
262
|
+
format="",
|
|
263
|
+
preview=None,
|
|
264
|
+
stats=None,
|
|
265
|
+
target_path="",
|
|
266
|
+
extra_data=None,
|
|
267
|
+
label_column: Optional[str] = None,
|
|
268
|
+
**kwargs,
|
|
269
|
+
) -> DatasetArtifact:
|
|
270
|
+
"""
|
|
271
|
+
Log a dataset artifact.
|
|
272
|
+
See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
|
|
273
|
+
"""
|
|
274
|
+
labels = self._add_default_labels(labels)
|
|
275
|
+
return self.project.log_dataset(
|
|
276
|
+
key,
|
|
277
|
+
df,
|
|
278
|
+
tag=tag,
|
|
279
|
+
local_path=local_path,
|
|
280
|
+
artifact_path=artifact_path,
|
|
281
|
+
upload=upload,
|
|
282
|
+
labels=labels,
|
|
283
|
+
format=format,
|
|
284
|
+
preview=preview,
|
|
285
|
+
stats=stats,
|
|
286
|
+
target_path=target_path,
|
|
287
|
+
extra_data=extra_data,
|
|
288
|
+
label_column=label_column,
|
|
289
|
+
**kwargs,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def log_model(
|
|
293
|
+
self,
|
|
294
|
+
key,
|
|
295
|
+
body=None,
|
|
296
|
+
framework="",
|
|
297
|
+
tag="",
|
|
298
|
+
model_dir=None,
|
|
299
|
+
model_file=None,
|
|
300
|
+
algorithm=None,
|
|
301
|
+
metrics=None,
|
|
302
|
+
parameters=None,
|
|
303
|
+
artifact_path=None,
|
|
304
|
+
upload=None,
|
|
305
|
+
labels=None,
|
|
306
|
+
inputs: Optional[list[mlrun.features.Feature]] = None,
|
|
307
|
+
outputs: Optional[list[mlrun.features.Feature]] = None,
|
|
308
|
+
feature_vector: Optional[str] = None,
|
|
309
|
+
feature_weights: Optional[list] = None,
|
|
310
|
+
training_set=None,
|
|
311
|
+
label_column=None,
|
|
312
|
+
extra_data=None,
|
|
313
|
+
**kwargs,
|
|
314
|
+
) -> ModelArtifact:
|
|
315
|
+
"""
|
|
316
|
+
Log a model artifact.
|
|
317
|
+
See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
|
|
318
|
+
"""
|
|
319
|
+
labels = self._add_default_labels(labels)
|
|
320
|
+
return self.project.log_model(
|
|
321
|
+
key,
|
|
322
|
+
body=body,
|
|
323
|
+
framework=framework,
|
|
324
|
+
tag=tag,
|
|
325
|
+
model_dir=model_dir,
|
|
326
|
+
model_file=model_file,
|
|
327
|
+
algorithm=algorithm,
|
|
328
|
+
metrics=metrics,
|
|
329
|
+
parameters=parameters,
|
|
330
|
+
artifact_path=artifact_path,
|
|
331
|
+
upload=upload,
|
|
332
|
+
labels=labels,
|
|
333
|
+
inputs=inputs,
|
|
334
|
+
outputs=outputs,
|
|
335
|
+
feature_vector=feature_vector,
|
|
336
|
+
feature_weights=feature_weights,
|
|
337
|
+
training_set=training_set,
|
|
338
|
+
label_column=label_column,
|
|
339
|
+
extra_data=extra_data,
|
|
340
|
+
**kwargs,
|
|
341
|
+
)
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
|
-
from
|
|
17
|
+
from abc import ABC
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import semver
|
|
@@ -23,7 +23,7 @@ import mlrun.model_monitoring.applications.base as mm_base
|
|
|
23
23
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
24
24
|
from mlrun.errors import MLRunIncompatibleVersionError
|
|
25
25
|
|
|
26
|
-
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.
|
|
26
|
+
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.32")
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
|
|
@@ -57,84 +57,14 @@ except ModuleNotFoundError:
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
if _HAS_EVIDENTLY:
|
|
60
|
-
from evidently.
|
|
61
|
-
from evidently.report.report import Report
|
|
62
|
-
from evidently.suite.base_suite import Suite
|
|
60
|
+
from evidently.suite.base_suite import Display
|
|
63
61
|
from evidently.ui.type_aliases import STR_UUID
|
|
64
62
|
from evidently.ui.workspace import Workspace
|
|
65
|
-
from evidently.utils.dashboard import TemplateParams
|
|
63
|
+
from evidently.utils.dashboard import TemplateParams, file_html_template
|
|
66
64
|
|
|
67
65
|
|
|
68
|
-
class EvidentlyModelMonitoringApplicationBase(
|
|
69
|
-
|
|
70
|
-
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
71
|
-
) -> None:
|
|
72
|
-
"""
|
|
73
|
-
A class for integrating Evidently for mlrun model monitoring within a monitoring application.
|
|
74
|
-
Note: evidently is not installed by default in the mlrun/mlrun image.
|
|
75
|
-
It must be installed separately to use this class.
|
|
76
|
-
|
|
77
|
-
:param evidently_workspace_path: (str) The path to the Evidently workspace.
|
|
78
|
-
:param evidently_project_id: (str) The ID of the Evidently project.
|
|
79
|
-
|
|
80
|
-
"""
|
|
81
|
-
if not _HAS_EVIDENTLY:
|
|
82
|
-
raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
|
|
83
|
-
self.evidently_workspace = Workspace.create(evidently_workspace_path)
|
|
84
|
-
self.evidently_project_id = evidently_project_id
|
|
85
|
-
self.evidently_project = self.evidently_workspace.get_project(
|
|
86
|
-
evidently_project_id
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
def log_evidently_object(
|
|
90
|
-
self, evidently_object: Union["Report", "Suite"], artifact_name: str
|
|
91
|
-
):
|
|
92
|
-
"""
|
|
93
|
-
Logs an Evidently report or suite as an artifact.
|
|
94
|
-
|
|
95
|
-
:param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
|
|
96
|
-
:param artifact_name: (str) The name for the logged artifact.
|
|
97
|
-
"""
|
|
98
|
-
evidently_object_html = evidently_object.get_html()
|
|
99
|
-
self.context.log_artifact(
|
|
100
|
-
artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
def log_project_dashboard(
|
|
104
|
-
self,
|
|
105
|
-
timestamp_start: pd.Timestamp,
|
|
106
|
-
timestamp_end: pd.Timestamp,
|
|
107
|
-
artifact_name: str = "dashboard",
|
|
108
|
-
):
|
|
109
|
-
"""
|
|
110
|
-
Logs an Evidently project dashboard.
|
|
111
|
-
|
|
112
|
-
:param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
|
|
113
|
-
:param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
|
|
114
|
-
:param artifact_name: (str) The name for the logged artifact.
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
dashboard_info = self.evidently_project.build_dashboard_info(
|
|
118
|
-
timestamp_start, timestamp_end
|
|
119
|
-
)
|
|
120
|
-
template_params = TemplateParams(
|
|
121
|
-
dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
|
|
122
|
-
dashboard_info=dashboard_info,
|
|
123
|
-
additional_graphs={},
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
dashboard_html = self._render(determine_template("inline"), template_params)
|
|
127
|
-
self.context.log_artifact(
|
|
128
|
-
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
@staticmethod
|
|
132
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
133
|
-
return temple_func(params=template_params)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class EvidentlyModelMonitoringApplicationBaseV2(
|
|
137
|
-
mm_base.ModelMonitoringApplicationBaseV2
|
|
66
|
+
class EvidentlyModelMonitoringApplicationBase(
|
|
67
|
+
mm_base.ModelMonitoringApplicationBase, ABC
|
|
138
68
|
):
|
|
139
69
|
def __init__(
|
|
140
70
|
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
@@ -161,14 +91,14 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
161
91
|
@staticmethod
|
|
162
92
|
def log_evidently_object(
|
|
163
93
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
164
|
-
evidently_object:
|
|
94
|
+
evidently_object: "Display",
|
|
165
95
|
artifact_name: str,
|
|
166
|
-
):
|
|
96
|
+
) -> None:
|
|
167
97
|
"""
|
|
168
98
|
Logs an Evidently report or suite as an artifact.
|
|
169
99
|
|
|
170
100
|
:param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
|
|
171
|
-
:param evidently_object: (
|
|
101
|
+
:param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
|
|
172
102
|
:param artifact_name: (str) The name for the logged artifact.
|
|
173
103
|
"""
|
|
174
104
|
evidently_object_html = evidently_object.get_html()
|
|
@@ -182,7 +112,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
182
112
|
timestamp_start: pd.Timestamp,
|
|
183
113
|
timestamp_end: pd.Timestamp,
|
|
184
114
|
artifact_name: str = "dashboard",
|
|
185
|
-
):
|
|
115
|
+
) -> None:
|
|
186
116
|
"""
|
|
187
117
|
Logs an Evidently project dashboard.
|
|
188
118
|
|
|
@@ -201,11 +131,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
201
131
|
additional_graphs={},
|
|
202
132
|
)
|
|
203
133
|
|
|
204
|
-
dashboard_html =
|
|
134
|
+
dashboard_html = file_html_template(params=template_params)
|
|
205
135
|
monitoring_context.log_artifact(
|
|
206
136
|
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
207
137
|
)
|
|
208
|
-
|
|
209
|
-
@staticmethod
|
|
210
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
211
|
-
return temple_func(params=template_params)
|
|
@@ -31,7 +31,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
|
|
|
31
31
|
ResultStatusApp,
|
|
32
32
|
)
|
|
33
33
|
from mlrun.model_monitoring.applications import (
|
|
34
|
-
|
|
34
|
+
ModelMonitoringApplicationBase,
|
|
35
35
|
)
|
|
36
36
|
from mlrun.model_monitoring.metrics.histogram_distance import (
|
|
37
37
|
HellingerDistance,
|
|
@@ -87,11 +87,13 @@ class DataDriftClassifier:
|
|
|
87
87
|
return ResultStatusApp.no_detection
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
class HistogramDataDriftApplication(
|
|
90
|
+
class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
91
91
|
"""
|
|
92
92
|
MLRun's default data drift application for model monitoring.
|
|
93
93
|
|
|
94
|
-
The application expects tabular numerical data, and calculates three metrics over the features' histograms.
|
|
94
|
+
The application expects tabular numerical data, and calculates three metrics over the shared features' histograms.
|
|
95
|
+
The metrics are calculated on features that have reference data from the training dataset. When there is no
|
|
96
|
+
reference data (`feature_stats`), this application send a warning log and does nothing.
|
|
95
97
|
The three metrics are:
|
|
96
98
|
|
|
97
99
|
* Hellinger distance.
|
|
@@ -112,6 +114,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
112
114
|
|
|
113
115
|
project.enable_model_monitoring()
|
|
114
116
|
|
|
117
|
+
To avoid it, pass `deploy_histogram_data_drift_app=False`.
|
|
115
118
|
"""
|
|
116
119
|
|
|
117
120
|
NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
|
|
@@ -195,7 +198,10 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
195
198
|
EventFieldType.CURRENT_STATS: json.dumps(
|
|
196
199
|
monitoring_context.sample_df_stats
|
|
197
200
|
),
|
|
198
|
-
EventFieldType.DRIFT_MEASURES:
|
|
201
|
+
EventFieldType.DRIFT_MEASURES: json.dumps(
|
|
202
|
+
metrics_per_feature.T.to_dict()
|
|
203
|
+
| {metric.name: metric.value for metric in metrics}
|
|
204
|
+
),
|
|
199
205
|
EventFieldType.DRIFT_STATUS: status.value,
|
|
200
206
|
},
|
|
201
207
|
)
|
|
@@ -220,19 +226,18 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
220
226
|
return metrics
|
|
221
227
|
|
|
222
228
|
@staticmethod
|
|
223
|
-
def
|
|
224
|
-
|
|
229
|
+
def _get_shared_features_sample_stats(
|
|
230
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
225
231
|
) -> mlrun.common.model_monitoring.helpers.FeatureStats:
|
|
226
232
|
"""
|
|
227
|
-
|
|
228
|
-
in the plotly artifact
|
|
233
|
+
Filter out features without reference data in `feature_stats`, e.g. `timestamp`.
|
|
229
234
|
"""
|
|
230
|
-
|
|
231
|
-
|
|
235
|
+
return mlrun.common.model_monitoring.helpers.FeatureStats(
|
|
236
|
+
{
|
|
237
|
+
key: monitoring_context.sample_df_stats[key]
|
|
238
|
+
for key in monitoring_context.feature_stats
|
|
239
|
+
}
|
|
232
240
|
)
|
|
233
|
-
if EventFieldType.TIMESTAMP in sample_set_statistics:
|
|
234
|
-
del sample_set_statistics[EventFieldType.TIMESTAMP]
|
|
235
|
-
return sample_set_statistics
|
|
236
241
|
|
|
237
242
|
@staticmethod
|
|
238
243
|
def _log_json_artifact(
|
|
@@ -296,8 +301,8 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
296
301
|
self._log_json_artifact(drift_per_feature_values, monitoring_context)
|
|
297
302
|
|
|
298
303
|
self._log_plotly_table_artifact(
|
|
299
|
-
sample_set_statistics=self.
|
|
300
|
-
monitoring_context
|
|
304
|
+
sample_set_statistics=self._get_shared_features_sample_stats(
|
|
305
|
+
monitoring_context
|
|
301
306
|
),
|
|
302
307
|
inputs_statistics=monitoring_context.feature_stats,
|
|
303
308
|
metrics_per_feature=metrics_per_feature,
|
|
@@ -322,7 +327,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
322
327
|
"""
|
|
323
328
|
monitoring_context.logger.debug("Starting to run the application")
|
|
324
329
|
if not monitoring_context.feature_stats:
|
|
325
|
-
monitoring_context.logger.
|
|
330
|
+
monitoring_context.logger.warning(
|
|
326
331
|
"No feature statistics found, skipping the application. \n"
|
|
327
332
|
"In order to run the application, training set must be provided when logging the model."
|
|
328
333
|
)
|
|
@@ -29,8 +29,8 @@ class _ModelMonitoringApplicationDataRes(ABC):
|
|
|
29
29
|
def __post_init__(self):
|
|
30
30
|
pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
|
|
31
31
|
if not re.fullmatch(pat, self.name):
|
|
32
|
-
raise mlrun.errors.
|
|
33
|
-
"Attribute name must
|
|
32
|
+
raise mlrun.errors.MLRunValueError(
|
|
33
|
+
"Attribute name must comply with the regex `[a-zA-Z_][a-zA-Z0-9_]*`"
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
@abstractmethod
|
|
@@ -45,7 +45,7 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
|
|
|
45
45
|
|
|
46
46
|
:param name: (str) Name of the application result. This name must be
|
|
47
47
|
unique for each metric in a single application
|
|
48
|
-
(name must be of the format [a-zA-Z_][a-zA-Z0-9_]
|
|
48
|
+
(name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
|
|
49
49
|
:param value: (float) Value of the application result.
|
|
50
50
|
:param kind: (ResultKindApp) Kind of application result.
|
|
51
51
|
:param status: (ResultStatusApp) Status of the application result.
|
|
@@ -80,7 +80,7 @@ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
|
|
|
80
80
|
|
|
81
81
|
:param name: (str) Name of the application metric. This name must be
|
|
82
82
|
unique for each metric in a single application
|
|
83
|
-
(name must be of the format [a-zA-Z_][a-zA-Z0-9_]
|
|
83
|
+
(name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
|
|
84
84
|
:param value: (float) Value of the application metric.
|
|
85
85
|
"""
|
|
86
86
|
|