mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/alerts/alert.py +75 -8
  3. mlrun/artifacts/base.py +1 -0
  4. mlrun/artifacts/manager.py +9 -2
  5. mlrun/common/constants.py +4 -1
  6. mlrun/common/db/sql_session.py +3 -2
  7. mlrun/common/formatters/__init__.py +1 -0
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
  10. mlrun/common/formatters/run.py +3 -0
  11. mlrun/common/helpers.py +0 -1
  12. mlrun/common/schemas/__init__.py +3 -1
  13. mlrun/common/schemas/alert.py +15 -12
  14. mlrun/common/schemas/api_gateway.py +6 -6
  15. mlrun/common/schemas/auth.py +5 -0
  16. mlrun/common/schemas/client_spec.py +0 -1
  17. mlrun/common/schemas/common.py +7 -4
  18. mlrun/common/schemas/frontend_spec.py +7 -0
  19. mlrun/common/schemas/function.py +7 -0
  20. mlrun/common/schemas/model_monitoring/__init__.py +4 -3
  21. mlrun/common/schemas/model_monitoring/constants.py +41 -26
  22. mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
  23. mlrun/common/schemas/notification.py +69 -12
  24. mlrun/common/schemas/project.py +45 -12
  25. mlrun/common/schemas/workflow.py +10 -2
  26. mlrun/common/types.py +1 -0
  27. mlrun/config.py +91 -35
  28. mlrun/data_types/data_types.py +6 -1
  29. mlrun/data_types/spark.py +2 -2
  30. mlrun/data_types/to_pandas.py +57 -25
  31. mlrun/datastore/__init__.py +1 -0
  32. mlrun/datastore/alibaba_oss.py +3 -2
  33. mlrun/datastore/azure_blob.py +125 -37
  34. mlrun/datastore/base.py +42 -21
  35. mlrun/datastore/datastore.py +4 -2
  36. mlrun/datastore/datastore_profile.py +1 -1
  37. mlrun/datastore/dbfs_store.py +3 -7
  38. mlrun/datastore/filestore.py +1 -3
  39. mlrun/datastore/google_cloud_storage.py +85 -29
  40. mlrun/datastore/inmem.py +4 -1
  41. mlrun/datastore/redis.py +1 -0
  42. mlrun/datastore/s3.py +25 -12
  43. mlrun/datastore/sources.py +76 -4
  44. mlrun/datastore/spark_utils.py +30 -0
  45. mlrun/datastore/storeytargets.py +151 -0
  46. mlrun/datastore/targets.py +102 -131
  47. mlrun/datastore/v3io.py +1 -0
  48. mlrun/db/base.py +15 -6
  49. mlrun/db/httpdb.py +57 -28
  50. mlrun/db/nopdb.py +29 -5
  51. mlrun/errors.py +20 -3
  52. mlrun/execution.py +46 -5
  53. mlrun/feature_store/api.py +25 -1
  54. mlrun/feature_store/common.py +6 -11
  55. mlrun/feature_store/feature_vector.py +3 -1
  56. mlrun/feature_store/retrieval/job.py +4 -1
  57. mlrun/feature_store/retrieval/spark_merger.py +10 -39
  58. mlrun/feature_store/steps.py +8 -0
  59. mlrun/frameworks/_common/plan.py +3 -3
  60. mlrun/frameworks/_ml_common/plan.py +1 -1
  61. mlrun/frameworks/parallel_coordinates.py +2 -3
  62. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  63. mlrun/k8s_utils.py +48 -2
  64. mlrun/launcher/client.py +6 -6
  65. mlrun/launcher/local.py +2 -2
  66. mlrun/model.py +215 -34
  67. mlrun/model_monitoring/api.py +38 -24
  68. mlrun/model_monitoring/applications/__init__.py +1 -2
  69. mlrun/model_monitoring/applications/_application_steps.py +60 -29
  70. mlrun/model_monitoring/applications/base.py +2 -174
  71. mlrun/model_monitoring/applications/context.py +197 -70
  72. mlrun/model_monitoring/applications/evidently_base.py +11 -85
  73. mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
  74. mlrun/model_monitoring/applications/results.py +4 -4
  75. mlrun/model_monitoring/controller.py +110 -282
  76. mlrun/model_monitoring/db/stores/__init__.py +8 -3
  77. mlrun/model_monitoring/db/stores/base/store.py +3 -0
  78. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  79. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
  80. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
  81. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
  82. mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
  83. mlrun/model_monitoring/db/tsdb/base.py +147 -15
  84. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
  85. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
  86. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
  87. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
  88. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
  89. mlrun/model_monitoring/helpers.py +70 -50
  90. mlrun/model_monitoring/stream_processing.py +96 -195
  91. mlrun/model_monitoring/writer.py +13 -5
  92. mlrun/package/packagers/default_packager.py +2 -2
  93. mlrun/projects/operations.py +16 -8
  94. mlrun/projects/pipelines.py +126 -115
  95. mlrun/projects/project.py +286 -129
  96. mlrun/render.py +3 -3
  97. mlrun/run.py +38 -19
  98. mlrun/runtimes/__init__.py +19 -8
  99. mlrun/runtimes/base.py +4 -1
  100. mlrun/runtimes/daskjob.py +1 -1
  101. mlrun/runtimes/funcdoc.py +1 -1
  102. mlrun/runtimes/kubejob.py +6 -6
  103. mlrun/runtimes/local.py +12 -5
  104. mlrun/runtimes/nuclio/api_gateway.py +68 -8
  105. mlrun/runtimes/nuclio/application/application.py +307 -70
  106. mlrun/runtimes/nuclio/function.py +63 -14
  107. mlrun/runtimes/nuclio/serving.py +10 -10
  108. mlrun/runtimes/pod.py +25 -19
  109. mlrun/runtimes/remotesparkjob.py +2 -5
  110. mlrun/runtimes/sparkjob/spark3job.py +16 -17
  111. mlrun/runtimes/utils.py +34 -0
  112. mlrun/serving/routers.py +2 -5
  113. mlrun/serving/server.py +37 -19
  114. mlrun/serving/states.py +30 -3
  115. mlrun/serving/v2_serving.py +44 -35
  116. mlrun/track/trackers/mlflow_tracker.py +5 -0
  117. mlrun/utils/async_http.py +1 -1
  118. mlrun/utils/db.py +18 -0
  119. mlrun/utils/helpers.py +150 -36
  120. mlrun/utils/http.py +1 -1
  121. mlrun/utils/notifications/notification/__init__.py +0 -1
  122. mlrun/utils/notifications/notification/webhook.py +8 -1
  123. mlrun/utils/notifications/notification_pusher.py +1 -1
  124. mlrun/utils/v3io_clients.py +2 -2
  125. mlrun/utils/version/version.json +2 -2
  126. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
  127. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
  128. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
  129. mlrun/feature_store/retrieval/conversion.py +0 -271
  130. mlrun/model_monitoring/controller_handler.py +0 -37
  131. mlrun/model_monitoring/evidently_application.py +0 -20
  132. mlrun/model_monitoring/prometheus.py +0 -216
  133. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
  134. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
  135. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
@@ -11,19 +11,22 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import json
15
- import typing
16
+ import socket
17
+ from typing import Any, Optional, cast
16
18
 
17
19
  import numpy as np
18
20
  import pandas as pd
19
21
 
20
- import mlrun.common.helpers
21
- import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.constants as mlrun_constants
22
23
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
23
24
  import mlrun.feature_store as fstore
24
- from mlrun.artifacts.model import ModelArtifact, get_model
25
+ import mlrun.features
26
+ import mlrun.serving
27
+ import mlrun.utils
28
+ from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
25
29
  from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
26
- from mlrun.execution import MLClientCtx
27
30
  from mlrun.model_monitoring.helpers import (
28
31
  calculate_inputs_statistics,
29
32
  get_endpoint_record,
@@ -31,13 +34,17 @@ from mlrun.model_monitoring.helpers import (
31
34
  from mlrun.model_monitoring.model_endpoint import ModelEndpoint
32
35
 
33
36
 
34
- class MonitoringApplicationContext(MLClientCtx):
37
+ class MonitoringApplicationContext:
35
38
  """
36
39
  The monitoring context holds all the relevant information for the monitoring application,
37
40
  and also it can be used for logging artifacts and results.
38
41
  The monitoring context has the following attributes:
39
42
 
40
- :param application_name: (str) the app name
43
+ :param application_name: (str) The model monitoring application name.
44
+ :param project_name: (str) The project name.
45
+ :param project: (MlrunProject) The project object.
46
+ :param logger: (mlrun.utils.Logger) MLRun logger.
47
+ :param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
41
48
  :param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
42
49
  :param feature_stats: (FeatureStats) The train sample distribution dictionary.
43
50
  :param sample_df: (pd.DataFrame) The new sample DataFrame.
@@ -49,81 +56,82 @@ class MonitoringApplicationContext(MLClientCtx):
49
56
  :param model_endpoint: (ModelEndpoint) The model endpoint object.
50
57
  :param feature_names: (list[str]) List of models feature names.
51
58
  :param label_names: (list[str]) List of models label names.
52
- :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object, and list of
53
-
59
+ :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
60
+ and a list of extra data items.
54
61
  """
55
62
 
56
- def __init__(self, **kwargs):
57
- super().__init__(**kwargs)
58
-
59
- def __post_init__(self):
60
- self.application_name: typing.Optional[str] = None
61
- self.start_infer_time: typing.Optional[pd.Timestamp] = None
62
- self.end_infer_time: typing.Optional[pd.Timestamp] = None
63
- self.latest_request: typing.Optional[pd.Timestamp] = None
64
- self.endpoint_id: typing.Optional[str] = None
65
- self.output_stream_uri: typing.Optional[str] = None
66
-
67
- self._sample_df: typing.Optional[pd.DataFrame] = None
68
- self._model_endpoint: typing.Optional[ModelEndpoint] = None
69
- self._feature_stats: typing.Optional[FeatureStats] = None
70
- self._sample_df_stats: typing.Optional[FeatureStats] = None
71
-
72
- @classmethod
73
- def from_dict(
74
- cls,
75
- attrs: dict,
76
- context=None,
77
- model_endpoint_dict=None,
78
- **kwargs,
79
- ) -> "MonitoringApplicationContext":
63
+ def __init__(
64
+ self,
65
+ *,
66
+ graph_context: mlrun.serving.GraphContext,
67
+ application_name: str,
68
+ event: dict[str, Any],
69
+ model_endpoint_dict: dict[str, ModelEndpoint],
70
+ ) -> None:
80
71
  """
81
- Create an instance of the MonitoringApplicationContext from a dictionary.
72
+ Initialize a `MonitoringApplicationContext` object.
73
+ Note: this object should not be instantiated manually.
82
74
 
83
- :param attrs: The instance data dictionary.
84
- :param context: The current application context.
75
+ :param application_name: The application name.
76
+ :param event: The instance data dictionary.
85
77
  :param model_endpoint_dict: Dictionary of model endpoints.
86
-
87
78
  """
79
+ self.application_name = application_name
88
80
 
89
- if not context:
90
- self = (
91
- super().from_dict(
92
- attrs=attrs.get(mm_constants.ApplicationEvent.MLRUN_CONTEXT, {}),
93
- **kwargs,
94
- ),
95
- )
96
- else:
97
- self = context
98
- self.__post_init__()
81
+ self.project_name = graph_context.project
82
+ self.project = mlrun.load_project(url=self.project_name)
83
+
84
+ # MLRun Logger
85
+ self.logger = mlrun.utils.create_logger(
86
+ level=mlrun.mlconf.log_level,
87
+ formatter_kind=mlrun.mlconf.log_formatter,
88
+ name="monitoring-application",
89
+ )
90
+ # Nuclio logger - `nuclio.request.Logger`.
91
+ # Note: this logger does not accept keyword arguments.
92
+ self.nuclio_logger = graph_context.logger
99
93
 
94
+ # event data
100
95
  self.start_infer_time = pd.Timestamp(
101
- attrs.get(mm_constants.ApplicationEvent.START_INFER_TIME)
96
+ cast(str, event.get(mm_constants.ApplicationEvent.START_INFER_TIME))
102
97
  )
103
98
  self.end_infer_time = pd.Timestamp(
104
- attrs.get(mm_constants.ApplicationEvent.END_INFER_TIME)
99
+ cast(str, event.get(mm_constants.ApplicationEvent.END_INFER_TIME))
105
100
  )
106
- self.latest_request = pd.Timestamp(
107
- attrs.get(mm_constants.ApplicationEvent.LAST_REQUEST)
101
+ self.endpoint_id = cast(
102
+ str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
108
103
  )
109
- self.application_name = attrs.get(
110
- mm_constants.ApplicationEvent.APPLICATION_NAME
104
+ self.output_stream_uri = cast(
105
+ str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
111
106
  )
112
- self._feature_stats = json.loads(
113
- attrs.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
114
- )
115
- self._sample_df_stats = json.loads(
116
- attrs.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
107
+
108
+ self._feature_stats: Optional[FeatureStats] = None
109
+ self._sample_df_stats: Optional[FeatureStats] = None
110
+
111
+ # Default labels for the artifacts
112
+ self._default_labels = self._get_default_labels()
113
+
114
+ # Persistent data - fetched when needed
115
+ self._sample_df: Optional[pd.DataFrame] = None
116
+ self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
117
+ self.endpoint_id
117
118
  )
118
119
 
119
- self.endpoint_id = attrs.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
120
- self._model_endpoint = model_endpoint_dict.get(self.endpoint_id)
120
+ def _get_default_labels(self) -> dict[str, str]:
121
+ return {
122
+ mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
123
+ mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
124
+ mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
125
+ mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
126
+ }
121
127
 
122
- return self
128
+ def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
129
+ """Add the default labels to logged artifacts labels"""
130
+ return (labels or {}) | self._default_labels
123
131
 
124
132
  @property
125
133
  def sample_df(self) -> pd.DataFrame:
126
- if not hasattr(self, "_sample_df") or self._sample_df is None:
134
+ if self._sample_df is None:
127
135
  feature_set = fstore.get_feature_set(
128
136
  self.model_endpoint.status.monitoring_feature_set_uri
129
137
  )
@@ -146,15 +154,15 @@ class MonitoringApplicationContext(MLClientCtx):
146
154
 
147
155
  @property
148
156
  def model_endpoint(self) -> ModelEndpoint:
149
- if not hasattr(self, "_model_endpoint") or not self._model_endpoint:
157
+ if not self._model_endpoint:
150
158
  self._model_endpoint = ModelEndpoint.from_flat_dict(
151
- get_endpoint_record(self.project, self.endpoint_id)
159
+ get_endpoint_record(self.project_name, self.endpoint_id)
152
160
  )
153
161
  return self._model_endpoint
154
162
 
155
163
  @property
156
164
  def feature_stats(self) -> FeatureStats:
157
- if not hasattr(self, "_feature_stats") or not self._feature_stats:
165
+ if not self._feature_stats:
158
166
  self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
159
167
  pad_features_hist(self._feature_stats)
160
168
  return self._feature_stats
@@ -162,7 +170,7 @@ class MonitoringApplicationContext(MLClientCtx):
162
170
  @property
163
171
  def sample_df_stats(self) -> FeatureStats:
164
172
  """statistics of the sample dataframe"""
165
- if not hasattr(self, "_sample_df_stats") or not self._sample_df_stats:
173
+ if not self._sample_df_stats:
166
174
  self._sample_df_stats = calculate_inputs_statistics(
167
175
  self.feature_stats, self.sample_df
168
176
  )
@@ -186,13 +194,11 @@ class MonitoringApplicationContext(MLClientCtx):
186
194
 
187
195
  @property
188
196
  def model(self) -> tuple[str, ModelArtifact, dict]:
189
- """return model file, model spec object, and list of extra data items"""
197
+ """The model file, model spec object, and a list of extra data items"""
190
198
  return get_model(self.model_endpoint.spec.model_uri)
191
199
 
192
200
  @staticmethod
193
- def dict_to_histogram(
194
- histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
195
- ) -> pd.DataFrame:
201
+ def dict_to_histogram(histogram_dict: FeatureStats) -> pd.DataFrame:
196
202
  """
197
203
  Convert histogram dictionary to pandas DataFrame with feature histograms as columns
198
204
 
@@ -212,3 +218,124 @@ class MonitoringApplicationContext(MLClientCtx):
212
218
  histograms = pd.DataFrame(histograms)
213
219
 
214
220
  return histograms
221
+
222
+ def log_artifact(
223
+ self,
224
+ item,
225
+ body=None,
226
+ tag: str = "",
227
+ local_path: str = "",
228
+ artifact_path: Optional[str] = None,
229
+ format: Optional[str] = None,
230
+ upload: Optional[bool] = None,
231
+ labels: Optional[dict[str, str]] = None,
232
+ target_path: Optional[str] = None,
233
+ **kwargs,
234
+ ) -> Artifact:
235
+ """
236
+ Log an artifact.
237
+ See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
238
+ """
239
+ labels = self._add_default_labels(labels)
240
+ return self.project.log_artifact(
241
+ item,
242
+ body=body,
243
+ tag=tag,
244
+ local_path=local_path,
245
+ artifact_path=artifact_path,
246
+ format=format,
247
+ upload=upload,
248
+ labels=labels,
249
+ target_path=target_path,
250
+ **kwargs,
251
+ )
252
+
253
+ def log_dataset(
254
+ self,
255
+ key,
256
+ df,
257
+ tag="",
258
+ local_path=None,
259
+ artifact_path=None,
260
+ upload=None,
261
+ labels=None,
262
+ format="",
263
+ preview=None,
264
+ stats=None,
265
+ target_path="",
266
+ extra_data=None,
267
+ label_column: Optional[str] = None,
268
+ **kwargs,
269
+ ) -> DatasetArtifact:
270
+ """
271
+ Log a dataset artifact.
272
+ See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
273
+ """
274
+ labels = self._add_default_labels(labels)
275
+ return self.project.log_dataset(
276
+ key,
277
+ df,
278
+ tag=tag,
279
+ local_path=local_path,
280
+ artifact_path=artifact_path,
281
+ upload=upload,
282
+ labels=labels,
283
+ format=format,
284
+ preview=preview,
285
+ stats=stats,
286
+ target_path=target_path,
287
+ extra_data=extra_data,
288
+ label_column=label_column,
289
+ **kwargs,
290
+ )
291
+
292
+ def log_model(
293
+ self,
294
+ key,
295
+ body=None,
296
+ framework="",
297
+ tag="",
298
+ model_dir=None,
299
+ model_file=None,
300
+ algorithm=None,
301
+ metrics=None,
302
+ parameters=None,
303
+ artifact_path=None,
304
+ upload=None,
305
+ labels=None,
306
+ inputs: Optional[list[mlrun.features.Feature]] = None,
307
+ outputs: Optional[list[mlrun.features.Feature]] = None,
308
+ feature_vector: Optional[str] = None,
309
+ feature_weights: Optional[list] = None,
310
+ training_set=None,
311
+ label_column=None,
312
+ extra_data=None,
313
+ **kwargs,
314
+ ) -> ModelArtifact:
315
+ """
316
+ Log a model artifact.
317
+ See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
318
+ """
319
+ labels = self._add_default_labels(labels)
320
+ return self.project.log_model(
321
+ key,
322
+ body=body,
323
+ framework=framework,
324
+ tag=tag,
325
+ model_dir=model_dir,
326
+ model_file=model_file,
327
+ algorithm=algorithm,
328
+ metrics=metrics,
329
+ parameters=parameters,
330
+ artifact_path=artifact_path,
331
+ upload=upload,
332
+ labels=labels,
333
+ inputs=inputs,
334
+ outputs=outputs,
335
+ feature_vector=feature_vector,
336
+ feature_weights=feature_weights,
337
+ training_set=training_set,
338
+ label_column=label_column,
339
+ extra_data=extra_data,
340
+ **kwargs,
341
+ )
@@ -14,7 +14,7 @@
14
14
 
15
15
  import uuid
16
16
  import warnings
17
- from typing import Union
17
+ from abc import ABC
18
18
 
19
19
  import pandas as pd
20
20
  import semver
@@ -23,7 +23,7 @@ import mlrun.model_monitoring.applications.base as mm_base
23
23
  import mlrun.model_monitoring.applications.context as mm_context
24
24
  from mlrun.errors import MLRunIncompatibleVersionError
25
25
 
26
- SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.32")
27
27
 
28
28
 
29
29
  def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
@@ -57,84 +57,14 @@ except ModuleNotFoundError:
57
57
 
58
58
 
59
59
  if _HAS_EVIDENTLY:
60
- from evidently.renderers.notebook_utils import determine_template
61
- from evidently.report.report import Report
62
- from evidently.suite.base_suite import Suite
60
+ from evidently.suite.base_suite import Display
63
61
  from evidently.ui.type_aliases import STR_UUID
64
62
  from evidently.ui.workspace import Workspace
65
- from evidently.utils.dashboard import TemplateParams
63
+ from evidently.utils.dashboard import TemplateParams, file_html_template
66
64
 
67
65
 
68
- class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
69
- def __init__(
70
- self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
71
- ) -> None:
72
- """
73
- A class for integrating Evidently for mlrun model monitoring within a monitoring application.
74
- Note: evidently is not installed by default in the mlrun/mlrun image.
75
- It must be installed separately to use this class.
76
-
77
- :param evidently_workspace_path: (str) The path to the Evidently workspace.
78
- :param evidently_project_id: (str) The ID of the Evidently project.
79
-
80
- """
81
- if not _HAS_EVIDENTLY:
82
- raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
83
- self.evidently_workspace = Workspace.create(evidently_workspace_path)
84
- self.evidently_project_id = evidently_project_id
85
- self.evidently_project = self.evidently_workspace.get_project(
86
- evidently_project_id
87
- )
88
-
89
- def log_evidently_object(
90
- self, evidently_object: Union["Report", "Suite"], artifact_name: str
91
- ):
92
- """
93
- Logs an Evidently report or suite as an artifact.
94
-
95
- :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
96
- :param artifact_name: (str) The name for the logged artifact.
97
- """
98
- evidently_object_html = evidently_object.get_html()
99
- self.context.log_artifact(
100
- artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
101
- )
102
-
103
- def log_project_dashboard(
104
- self,
105
- timestamp_start: pd.Timestamp,
106
- timestamp_end: pd.Timestamp,
107
- artifact_name: str = "dashboard",
108
- ):
109
- """
110
- Logs an Evidently project dashboard.
111
-
112
- :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
113
- :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
114
- :param artifact_name: (str) The name for the logged artifact.
115
- """
116
-
117
- dashboard_info = self.evidently_project.build_dashboard_info(
118
- timestamp_start, timestamp_end
119
- )
120
- template_params = TemplateParams(
121
- dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
122
- dashboard_info=dashboard_info,
123
- additional_graphs={},
124
- )
125
-
126
- dashboard_html = self._render(determine_template("inline"), template_params)
127
- self.context.log_artifact(
128
- artifact_name, body=dashboard_html.encode("utf-8"), format="html"
129
- )
130
-
131
- @staticmethod
132
- def _render(temple_func, template_params: "TemplateParams"):
133
- return temple_func(params=template_params)
134
-
135
-
136
- class EvidentlyModelMonitoringApplicationBaseV2(
137
- mm_base.ModelMonitoringApplicationBaseV2
66
+ class EvidentlyModelMonitoringApplicationBase(
67
+ mm_base.ModelMonitoringApplicationBase, ABC
138
68
  ):
139
69
  def __init__(
140
70
  self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
@@ -161,14 +91,14 @@ class EvidentlyModelMonitoringApplicationBaseV2(
161
91
  @staticmethod
162
92
  def log_evidently_object(
163
93
  monitoring_context: mm_context.MonitoringApplicationContext,
164
- evidently_object: Union["Report", "Suite"],
94
+ evidently_object: "Display",
165
95
  artifact_name: str,
166
- ):
96
+ ) -> None:
167
97
  """
168
98
  Logs an Evidently report or suite as an artifact.
169
99
 
170
100
  :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
171
- :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
101
+ :param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
172
102
  :param artifact_name: (str) The name for the logged artifact.
173
103
  """
174
104
  evidently_object_html = evidently_object.get_html()
@@ -182,7 +112,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
182
112
  timestamp_start: pd.Timestamp,
183
113
  timestamp_end: pd.Timestamp,
184
114
  artifact_name: str = "dashboard",
185
- ):
115
+ ) -> None:
186
116
  """
187
117
  Logs an Evidently project dashboard.
188
118
 
@@ -201,11 +131,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
201
131
  additional_graphs={},
202
132
  )
203
133
 
204
- dashboard_html = self._render(determine_template("inline"), template_params)
134
+ dashboard_html = file_html_template(params=template_params)
205
135
  monitoring_context.log_artifact(
206
136
  artifact_name, body=dashboard_html.encode("utf-8"), format="html"
207
137
  )
208
-
209
- @staticmethod
210
- def _render(temple_func, template_params: "TemplateParams"):
211
- return temple_func(params=template_params)
@@ -31,7 +31,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
31
31
  ResultStatusApp,
32
32
  )
33
33
  from mlrun.model_monitoring.applications import (
34
- ModelMonitoringApplicationBaseV2,
34
+ ModelMonitoringApplicationBase,
35
35
  )
36
36
  from mlrun.model_monitoring.metrics.histogram_distance import (
37
37
  HellingerDistance,
@@ -87,11 +87,13 @@ class DataDriftClassifier:
87
87
  return ResultStatusApp.no_detection
88
88
 
89
89
 
90
- class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
90
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
91
91
  """
92
92
  MLRun's default data drift application for model monitoring.
93
93
 
94
- The application expects tabular numerical data, and calculates three metrics over the features' histograms.
94
+ The application expects tabular numerical data, and calculates three metrics over the shared features' histograms.
95
+ The metrics are calculated on features that have reference data from the training dataset. When there is no
96
+ reference data (`feature_stats`), this application send a warning log and does nothing.
95
97
  The three metrics are:
96
98
 
97
99
  * Hellinger distance.
@@ -112,6 +114,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
112
114
 
113
115
  project.enable_model_monitoring()
114
116
 
117
+ To avoid it, pass `deploy_histogram_data_drift_app=False`.
115
118
  """
116
119
 
117
120
  NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
@@ -195,7 +198,10 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
195
198
  EventFieldType.CURRENT_STATS: json.dumps(
196
199
  monitoring_context.sample_df_stats
197
200
  ),
198
- EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
201
+ EventFieldType.DRIFT_MEASURES: json.dumps(
202
+ metrics_per_feature.T.to_dict()
203
+ | {metric.name: metric.value for metric in metrics}
204
+ ),
199
205
  EventFieldType.DRIFT_STATUS: status.value,
200
206
  },
201
207
  )
@@ -220,19 +226,18 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
220
226
  return metrics
221
227
 
222
228
  @staticmethod
223
- def _remove_timestamp_feature(
224
- sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
229
+ def _get_shared_features_sample_stats(
230
+ monitoring_context: mm_context.MonitoringApplicationContext,
225
231
  ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
226
232
  """
227
- Drop the 'timestamp' feature if it exists, as it is irrelevant
228
- in the plotly artifact
233
+ Filter out features without reference data in `feature_stats`, e.g. `timestamp`.
229
234
  """
230
- sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
231
- sample_set_statistics.copy()
235
+ return mlrun.common.model_monitoring.helpers.FeatureStats(
236
+ {
237
+ key: monitoring_context.sample_df_stats[key]
238
+ for key in monitoring_context.feature_stats
239
+ }
232
240
  )
233
- if EventFieldType.TIMESTAMP in sample_set_statistics:
234
- del sample_set_statistics[EventFieldType.TIMESTAMP]
235
- return sample_set_statistics
236
241
 
237
242
  @staticmethod
238
243
  def _log_json_artifact(
@@ -296,8 +301,8 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
296
301
  self._log_json_artifact(drift_per_feature_values, monitoring_context)
297
302
 
298
303
  self._log_plotly_table_artifact(
299
- sample_set_statistics=self._remove_timestamp_feature(
300
- monitoring_context.sample_df_stats
304
+ sample_set_statistics=self._get_shared_features_sample_stats(
305
+ monitoring_context
301
306
  ),
302
307
  inputs_statistics=monitoring_context.feature_stats,
303
308
  metrics_per_feature=metrics_per_feature,
@@ -322,7 +327,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
322
327
  """
323
328
  monitoring_context.logger.debug("Starting to run the application")
324
329
  if not monitoring_context.feature_stats:
325
- monitoring_context.logger.info(
330
+ monitoring_context.logger.warning(
326
331
  "No feature statistics found, skipping the application. \n"
327
332
  "In order to run the application, training set must be provided when logging the model."
328
333
  )
@@ -29,8 +29,8 @@ class _ModelMonitoringApplicationDataRes(ABC):
29
29
  def __post_init__(self):
30
30
  pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
31
31
  if not re.fullmatch(pat, self.name):
32
- raise mlrun.errors.MLRunInvalidArgumentError(
33
- "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
32
+ raise mlrun.errors.MLRunValueError(
33
+ "Attribute name must comply with the regex `[a-zA-Z_][a-zA-Z0-9_]*`"
34
34
  )
35
35
 
36
36
  @abstractmethod
@@ -45,7 +45,7 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
45
45
 
46
46
  :param name: (str) Name of the application result. This name must be
47
47
  unique for each metric in a single application
48
- (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
48
+ (name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
49
49
  :param value: (float) Value of the application result.
50
50
  :param kind: (ResultKindApp) Kind of application result.
51
51
  :param status: (ResultStatusApp) Status of the application result.
@@ -80,7 +80,7 @@ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
80
80
 
81
81
  :param name: (str) Name of the application metric. This name must be
82
82
  unique for each metric in a single application
83
- (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
83
+ (name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
84
84
  :param value: (float) Value of the application metric.
85
85
  """
86
86