mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (76) hide show
  1. mlrun/__main__.py +0 -105
  2. mlrun/artifacts/__init__.py +1 -2
  3. mlrun/artifacts/base.py +8 -250
  4. mlrun/artifacts/dataset.py +1 -190
  5. mlrun/artifacts/manager.py +2 -41
  6. mlrun/artifacts/model.py +1 -140
  7. mlrun/artifacts/plots.py +1 -375
  8. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +24 -3
  10. mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
  11. mlrun/config.py +3 -3
  12. mlrun/data_types/to_pandas.py +4 -4
  13. mlrun/datastore/base.py +41 -9
  14. mlrun/datastore/datastore_profile.py +50 -3
  15. mlrun/datastore/inmem.py +2 -2
  16. mlrun/datastore/sources.py +43 -2
  17. mlrun/datastore/store_resources.py +2 -6
  18. mlrun/datastore/targets.py +106 -39
  19. mlrun/db/httpdb.py +4 -4
  20. mlrun/feature_store/__init__.py +0 -2
  21. mlrun/feature_store/api.py +12 -47
  22. mlrun/feature_store/feature_set.py +9 -0
  23. mlrun/feature_store/retrieval/base.py +9 -4
  24. mlrun/feature_store/retrieval/conversion.py +4 -4
  25. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  26. mlrun/feature_store/retrieval/job.py +2 -0
  27. mlrun/feature_store/retrieval/local_merger.py +2 -0
  28. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  29. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  30. mlrun/kfpops.py +5 -10
  31. mlrun/launcher/base.py +1 -1
  32. mlrun/launcher/client.py +1 -1
  33. mlrun/lists.py +2 -2
  34. mlrun/model.py +18 -9
  35. mlrun/model_monitoring/api.py +41 -18
  36. mlrun/model_monitoring/application.py +5 -305
  37. mlrun/model_monitoring/applications/__init__.py +11 -0
  38. mlrun/model_monitoring/applications/_application_steps.py +158 -0
  39. mlrun/model_monitoring/applications/base.py +282 -0
  40. mlrun/model_monitoring/applications/context.py +214 -0
  41. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  42. mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
  43. mlrun/model_monitoring/applications/results.py +99 -0
  44. mlrun/model_monitoring/controller.py +3 -1
  45. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  46. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
  47. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
  48. mlrun/model_monitoring/evidently_application.py +6 -118
  49. mlrun/model_monitoring/helpers.py +1 -1
  50. mlrun/model_monitoring/model_endpoint.py +3 -2
  51. mlrun/model_monitoring/stream_processing.py +2 -3
  52. mlrun/model_monitoring/writer.py +69 -39
  53. mlrun/platforms/iguazio.py +2 -2
  54. mlrun/projects/project.py +18 -31
  55. mlrun/render.py +2 -10
  56. mlrun/run.py +1 -3
  57. mlrun/runtimes/__init__.py +3 -3
  58. mlrun/runtimes/base.py +3 -3
  59. mlrun/runtimes/funcdoc.py +0 -28
  60. mlrun/runtimes/local.py +1 -1
  61. mlrun/runtimes/mpijob/__init__.py +0 -20
  62. mlrun/runtimes/mpijob/v1.py +1 -1
  63. mlrun/runtimes/nuclio/function.py +1 -1
  64. mlrun/runtimes/utils.py +1 -1
  65. mlrun/utils/helpers.py +27 -40
  66. mlrun/utils/notifications/notification/slack.py +4 -2
  67. mlrun/utils/notifications/notification_pusher.py +133 -14
  68. mlrun/utils/version/version.json +2 -2
  69. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
  70. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
  71. mlrun/runtimes/mpijob/v1alpha1.py +0 -29
  72. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  73. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
  74. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
  75. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
  76. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import uuid
16
+ import warnings
17
+ from typing import Union
18
+
19
+ import pandas as pd
20
+ import semver
21
+
22
+ import mlrun.model_monitoring.applications.base as mm_base
23
+ import mlrun.model_monitoring.applications.context as mm_context
24
+ from mlrun.errors import MLRunIncompatibleVersionError
25
+
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
27
+
28
+
29
+ def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
30
+ if ref.is_compatible(cur) or (
31
+ cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
32
+ ):
33
+ return
34
+ if cur.major == ref.major == 0 and cur.minor > ref.minor:
35
+ warnings.warn(
36
+ f"Evidently version {cur} is not compatible with the tested "
37
+ f"version {ref}, use at your own risk."
38
+ )
39
+ else:
40
+ raise MLRunIncompatibleVersionError(
41
+ f"Evidently version {cur} is not supported, please change to "
42
+ f"{ref} (or another compatible version)."
43
+ )
44
+
45
+
46
+ _HAS_EVIDENTLY = False
47
+ try:
48
+ import evidently # noqa: F401
49
+
50
+ _check_evidently_version(
51
+ cur=semver.Version.parse(evidently.__version__),
52
+ ref=SUPPORTED_EVIDENTLY_VERSION,
53
+ )
54
+ _HAS_EVIDENTLY = True
55
+ except ModuleNotFoundError:
56
+ pass
57
+
58
+
59
+ if _HAS_EVIDENTLY:
60
+ from evidently.renderers.notebook_utils import determine_template
61
+ from evidently.report.report import Report
62
+ from evidently.suite.base_suite import Suite
63
+ from evidently.ui.type_aliases import STR_UUID
64
+ from evidently.ui.workspace import Workspace
65
+ from evidently.utils.dashboard import TemplateParams
66
+
67
+
68
+ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
69
+ def __init__(
70
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
71
+ ) -> None:
72
+ """
73
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
74
+ Note: evidently is not installed by default in the mlrun/mlrun image.
75
+ It must be installed separately to use this class.
76
+
77
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
78
+ :param evidently_project_id: (str) The ID of the Evidently project.
79
+
80
+ """
81
+ if not _HAS_EVIDENTLY:
82
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
83
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
84
+ self.evidently_project_id = evidently_project_id
85
+ self.evidently_project = self.evidently_workspace.get_project(
86
+ evidently_project_id
87
+ )
88
+
89
+ def log_evidently_object(
90
+ self, evidently_object: Union["Report", "Suite"], artifact_name: str
91
+ ):
92
+ """
93
+ Logs an Evidently report or suite as an artifact.
94
+
95
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
96
+ :param artifact_name: (str) The name for the logged artifact.
97
+ """
98
+ evidently_object_html = evidently_object.get_html()
99
+ self.context.log_artifact(
100
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
101
+ )
102
+
103
+ def log_project_dashboard(
104
+ self,
105
+ timestamp_start: pd.Timestamp,
106
+ timestamp_end: pd.Timestamp,
107
+ artifact_name: str = "dashboard",
108
+ ):
109
+ """
110
+ Logs an Evidently project dashboard.
111
+
112
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
113
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
114
+ :param artifact_name: (str) The name for the logged artifact.
115
+ """
116
+
117
+ dashboard_info = self.evidently_project.build_dashboard_info(
118
+ timestamp_start, timestamp_end
119
+ )
120
+ template_params = TemplateParams(
121
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
122
+ dashboard_info=dashboard_info,
123
+ additional_graphs={},
124
+ )
125
+
126
+ dashboard_html = self._render(determine_template("inline"), template_params)
127
+ self.context.log_artifact(
128
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
129
+ )
130
+
131
+ @staticmethod
132
+ def _render(temple_func, template_params: "TemplateParams"):
133
+ return temple_func(params=template_params)
134
+
135
+
136
+ class EvidentlyModelMonitoringApplicationBaseV2(
137
+ mm_base.ModelMonitoringApplicationBaseV2
138
+ ):
139
+ def __init__(
140
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
141
+ ) -> None:
142
+ """
143
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
144
+ Note: evidently is not installed by default in the mlrun/mlrun image.
145
+ It must be installed separately to use this class.
146
+
147
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
148
+ :param evidently_project_id: (str) The ID of the Evidently project.
149
+
150
+ """
151
+
152
+ # TODO : more then one project (mep -> project)
153
+ if not _HAS_EVIDENTLY:
154
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
155
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
156
+ self.evidently_project_id = evidently_project_id
157
+ self.evidently_project = self.evidently_workspace.get_project(
158
+ evidently_project_id
159
+ )
160
+
161
+ @staticmethod
162
+ def log_evidently_object(
163
+ monitoring_context: mm_context.MonitoringApplicationContext,
164
+ evidently_object: Union["Report", "Suite"],
165
+ artifact_name: str,
166
+ ):
167
+ """
168
+ Logs an Evidently report or suite as an artifact.
169
+
170
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
171
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
172
+ :param artifact_name: (str) The name for the logged artifact.
173
+ """
174
+ evidently_object_html = evidently_object.get_html()
175
+ monitoring_context.log_artifact(
176
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
177
+ )
178
+
179
+ def log_project_dashboard(
180
+ self,
181
+ monitoring_context: mm_context.MonitoringApplicationContext,
182
+ timestamp_start: pd.Timestamp,
183
+ timestamp_end: pd.Timestamp,
184
+ artifact_name: str = "dashboard",
185
+ ):
186
+ """
187
+ Logs an Evidently project dashboard.
188
+
189
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
190
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
191
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
192
+ :param artifact_name: (str) The name for the logged artifact.
193
+ """
194
+
195
+ dashboard_info = self.evidently_project.build_dashboard_info(
196
+ timestamp_start, timestamp_end
197
+ )
198
+ template_params = TemplateParams(
199
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
200
+ dashboard_info=dashboard_info,
201
+ additional_graphs={},
202
+ )
203
+
204
+ dashboard_html = self._render(determine_template("inline"), template_params)
205
+ monitoring_context.log_artifact(
206
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
207
+ )
208
+
209
+ @staticmethod
210
+ def _render(temple_func, template_params: "TemplateParams"):
211
+ return temple_func(params=template_params)
@@ -11,15 +11,17 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import typing
15
15
  from dataclasses import dataclass
16
16
  from typing import Final, Optional, Protocol, cast
17
17
 
18
18
  import numpy as np
19
- from pandas import DataFrame, Series, Timestamp
19
+ from pandas import DataFrame, Series
20
20
 
21
21
  import mlrun.artifacts
22
22
  import mlrun.common.model_monitoring.helpers
23
+ import mlrun.model_monitoring.applications.context as mm_context
24
+ import mlrun.model_monitoring.applications.results as mm_results
23
25
  import mlrun.model_monitoring.features_drift_table as mm_drift_table
24
26
  from mlrun.common.schemas.model_monitoring.constants import (
25
27
  EventFieldType,
@@ -27,9 +29,8 @@ from mlrun.common.schemas.model_monitoring.constants import (
27
29
  ResultKindApp,
28
30
  ResultStatusApp,
29
31
  )
30
- from mlrun.model_monitoring.application import (
31
- ModelMonitoringApplicationBase,
32
- ModelMonitoringApplicationResult,
32
+ from mlrun.model_monitoring.applications import (
33
+ ModelMonitoringApplicationBaseV2,
33
34
  )
34
35
  from mlrun.model_monitoring.metrics.histogram_distance import (
35
36
  HellingerDistance,
@@ -85,7 +86,7 @@ class DataDriftClassifier:
85
86
  return ResultStatusApp.no_detection
86
87
 
87
88
 
88
- class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
89
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
89
90
  """
90
91
  MLRun's default data drift application for model monitoring.
91
92
 
@@ -95,7 +96,6 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
95
96
  """
96
97
 
97
98
  NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
98
- METRIC_KIND: Final[ResultKindApp] = ResultKindApp.data_drift
99
99
 
100
100
  _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
101
101
 
@@ -118,17 +118,22 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
118
118
  ), "TVD and Hellinger distance are required for the general data drift result"
119
119
 
120
120
  def _compute_metrics_per_feature(
121
- self, sample_df_stats: DataFrame, feature_stats: DataFrame
121
+ self, monitoring_context: mm_context.MonitoringApplicationContext
122
122
  ) -> DataFrame:
123
123
  """Compute the metrics for the different features and labels"""
124
124
  metrics_per_feature = DataFrame(
125
125
  columns=[metric_class.NAME for metric_class in self.metrics]
126
126
  )
127
-
127
+ feature_stats = monitoring_context.dict_to_histogram(
128
+ monitoring_context.feature_stats
129
+ )
130
+ sample_df_stats = monitoring_context.dict_to_histogram(
131
+ monitoring_context.sample_df_stats
132
+ )
128
133
  for feature_name in feature_stats:
129
134
  sample_hist = np.asarray(sample_df_stats[feature_name])
130
135
  reference_hist = np.asarray(feature_stats[feature_name])
131
- self.context.logger.info(
136
+ monitoring_context.logger.info(
132
137
  "Computing metrics for feature", feature_name=feature_name
133
138
  )
134
139
  metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
@@ -137,62 +142,54 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
137
142
  ).compute()
138
143
  for metric in self.metrics
139
144
  }
140
- self.context.logger.info("Finished computing the metrics")
145
+ monitoring_context.logger.info("Finished computing the metrics")
141
146
 
142
147
  return metrics_per_feature
143
148
 
144
149
  def _add_general_drift_result(
145
- self, results: list[ModelMonitoringApplicationResult], value: float
150
+ self,
151
+ metrics: list[mm_results.ModelMonitoringApplicationMetric],
146
152
  ) -> None:
147
153
  """Add the general drift result to the results list and log it"""
154
+ value = np.mean(
155
+ [
156
+ metric.value
157
+ for metric in metrics
158
+ if metric.name
159
+ in [
160
+ f"{HellingerDistance.NAME}_mean",
161
+ f"{TotalVarianceDistance.NAME}_mean",
162
+ ]
163
+ ]
164
+ )
165
+
148
166
  status = self._value_classifier.value_to_status(value)
149
- results.append(
150
- ModelMonitoringApplicationResult(
167
+ metrics.append(
168
+ mm_results.ModelMonitoringApplicationResult(
151
169
  name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
152
170
  value=value,
153
- kind=self.METRIC_KIND,
171
+ kind=ResultKindApp.data_drift,
154
172
  status=status,
155
173
  )
156
174
  )
157
175
 
158
- def _get_results(
176
+ def _get_metrics(
159
177
  self, metrics_per_feature: DataFrame
160
- ) -> list[ModelMonitoringApplicationResult]:
178
+ ) -> list[mm_results.ModelMonitoringApplicationMetric]:
161
179
  """Average the metrics over the features and add the status"""
162
- results: list[ModelMonitoringApplicationResult] = []
180
+ metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
163
181
 
164
- self.context.logger.debug("Averaging metrics over the features")
165
182
  metrics_mean = metrics_per_feature.mean().to_dict()
166
183
 
167
- self.context.logger.debug("Creating the results")
168
184
  for name, value in metrics_mean.items():
169
- if name == KullbackLeiblerDivergence.NAME:
170
- # This metric is not bounded from above [0, inf).
171
- # No status is currently reported for KL divergence
172
- status = ResultStatusApp.irrelevant
173
- else:
174
- status = self._value_classifier.value_to_status(value)
175
- results.append(
176
- ModelMonitoringApplicationResult(
185
+ metrics.append(
186
+ mm_results.ModelMonitoringApplicationMetric(
177
187
  name=f"{name}_mean",
178
188
  value=value,
179
- kind=self.METRIC_KIND,
180
- status=status,
181
189
  )
182
190
  )
183
191
 
184
- self._add_general_drift_result(
185
- results=results,
186
- value=np.mean(
187
- [
188
- metrics_mean[HellingerDistance.NAME],
189
- metrics_mean[TotalVarianceDistance.NAME],
190
- ]
191
- ),
192
- )
193
-
194
- self.context.logger.info("Finished with the results")
195
- return results
192
+ return metrics
196
193
 
197
194
  @staticmethod
198
195
  def _remove_timestamp_feature(
@@ -209,17 +206,21 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
209
206
  del sample_set_statistics[EventFieldType.TIMESTAMP]
210
207
  return sample_set_statistics
211
208
 
212
- def _log_json_artifact(self, drift_per_feature_values: Series) -> None:
209
+ def _log_json_artifact(
210
+ self,
211
+ drift_per_feature_values: Series,
212
+ monitoring_context: mm_context.MonitoringApplicationContext,
213
+ ) -> None:
213
214
  """Log the drift values as a JSON artifact"""
214
- self.context.logger.debug("Logging drift value per feature JSON artifact")
215
- self.context.log_artifact(
215
+ monitoring_context.logger.debug("Logging drift value per feature JSON artifact")
216
+ monitoring_context.log_artifact(
216
217
  mlrun.artifacts.Artifact(
217
218
  body=drift_per_feature_values.to_json(),
218
219
  format="json",
219
220
  key="features_drift_results",
220
221
  )
221
222
  )
222
- self.context.logger.debug("Logged JSON artifact successfully")
223
+ monitoring_context.logger.debug("Logged JSON artifact successfully")
223
224
 
224
225
  def _log_plotly_table_artifact(
225
226
  self,
@@ -227,21 +228,22 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
227
228
  inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
228
229
  metrics_per_feature: DataFrame,
229
230
  drift_per_feature_values: Series,
231
+ monitoring_context: mm_context.MonitoringApplicationContext,
230
232
  ) -> None:
231
233
  """Log the Plotly drift table artifact"""
232
- self.context.logger.debug(
234
+ monitoring_context.logger.debug(
233
235
  "Feature stats",
234
236
  sample_set_statistics=sample_set_statistics,
235
237
  inputs_statistics=inputs_statistics,
236
238
  )
237
239
 
238
- self.context.logger.debug("Computing drift results per feature")
240
+ monitoring_context.logger.debug("Computing drift results per feature")
239
241
  drift_results = {
240
242
  cast(str, key): (self._value_classifier.value_to_status(value), value)
241
243
  for key, value in drift_per_feature_values.items()
242
244
  }
243
- self.context.logger.debug("Logging plotly artifact")
244
- self.context.log_artifact(
245
+ monitoring_context.logger.debug("Logging plotly artifact")
246
+ monitoring_context.log_artifact(
245
247
  mm_drift_table.FeaturesDriftTablePlot().produce(
246
248
  sample_set_statistics=sample_set_statistics,
247
249
  inputs_statistics=inputs_statistics,
@@ -249,12 +251,11 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
249
251
  drift_results=drift_results,
250
252
  )
251
253
  )
252
- self.context.logger.debug("Logged plotly artifact successfully")
254
+ monitoring_context.logger.debug("Logged plotly artifact successfully")
253
255
 
254
256
  def _log_drift_artifacts(
255
257
  self,
256
- sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
257
- inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
258
+ monitoring_context: mm_context.MonitoringApplicationContext,
258
259
  metrics_per_feature: DataFrame,
259
260
  log_json_artifact: bool = True,
260
261
  ) -> None:
@@ -264,45 +265,59 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
264
265
  ].mean(axis=1)
265
266
 
266
267
  if log_json_artifact:
267
- self._log_json_artifact(drift_per_feature_values)
268
+ self._log_json_artifact(drift_per_feature_values, monitoring_context)
268
269
 
269
270
  self._log_plotly_table_artifact(
270
- sample_set_statistics=self._remove_timestamp_feature(sample_set_statistics),
271
- inputs_statistics=inputs_statistics,
271
+ sample_set_statistics=self._remove_timestamp_feature(
272
+ monitoring_context.sample_df_stats
273
+ ),
274
+ inputs_statistics=monitoring_context.feature_stats,
272
275
  metrics_per_feature=metrics_per_feature,
273
276
  drift_per_feature_values=drift_per_feature_values,
277
+ monitoring_context=monitoring_context,
274
278
  )
275
279
 
276
280
  def do_tracking(
277
281
  self,
278
- application_name: str,
279
- sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
280
- feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
281
- sample_df: DataFrame,
282
- start_infer_time: Timestamp,
283
- end_infer_time: Timestamp,
284
- latest_request: Timestamp,
285
- endpoint_id: str,
286
- output_stream_uri: str,
287
- ) -> list[ModelMonitoringApplicationResult]:
282
+ monitoring_context: mm_context.MonitoringApplicationContext,
283
+ ) -> list[
284
+ typing.Union[
285
+ mm_results.ModelMonitoringApplicationResult,
286
+ mm_results.ModelMonitoringApplicationMetric,
287
+ ]
288
+ ]:
288
289
  """
289
290
  Calculate and return the data drift metrics, averaged over the features.
290
291
 
291
- Refer to `ModelMonitoringApplicationBase` for the meaning of the
292
+ Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
292
293
  function arguments.
293
294
  """
294
- self.context.logger.debug("Starting to run the application")
295
+ monitoring_context.logger.debug("Starting to run the application")
296
+ if not monitoring_context.feature_stats:
297
+ monitoring_context.logger.info(
298
+ "No feature statistics found, skipping the application. \n"
299
+ "In order to run the application, training set must be provided when logging the model."
300
+ )
301
+ return []
295
302
  metrics_per_feature = self._compute_metrics_per_feature(
296
- sample_df_stats=self.dict_to_histogram(sample_df_stats),
297
- feature_stats=self.dict_to_histogram(feature_stats),
303
+ monitoring_context=monitoring_context
298
304
  )
299
- self.context.logger.debug("Saving artifacts")
305
+ monitoring_context.logger.debug("Saving artifacts")
300
306
  self._log_drift_artifacts(
301
- inputs_statistics=feature_stats,
302
- sample_set_statistics=sample_df_stats,
307
+ monitoring_context=monitoring_context,
303
308
  metrics_per_feature=metrics_per_feature,
304
309
  )
305
- self.context.logger.debug("Computing average per metric")
306
- results = self._get_results(metrics_per_feature)
307
- self.context.logger.debug("Finished running the application", results=results)
308
- return results
310
+ monitoring_context.logger.debug("Computing average per metric")
311
+ metrics_and_result: list[
312
+ typing.Union[
313
+ mm_results.ModelMonitoringApplicationMetric,
314
+ mm_results.ModelMonitoringApplicationResult,
315
+ ]
316
+ ] = self._get_metrics(metrics_per_feature)
317
+ self._add_general_drift_result(
318
+ metrics=metrics_and_result,
319
+ )
320
+ monitoring_context.logger.debug(
321
+ "Finished running the application", results=metrics_and_result
322
+ )
323
+ return metrics_and_result
@@ -0,0 +1,99 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import json
17
+ import re
18
+ from abc import ABC, abstractmethod
19
+
20
+ import mlrun.common.helpers
21
+ import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.schemas.model_monitoring.constants as mm_constant
23
+ import mlrun.utils.v3io_clients
24
+
25
+
26
+ class _ModelMonitoringApplicationDataRes(ABC):
27
+ name: str
28
+
29
+ def __post_init__(self):
30
+ pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
31
+ if not re.fullmatch(pat, self.name):
32
+ raise mlrun.errors.MLRunInvalidArgumentError(
33
+ "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
34
+ )
35
+
36
+ @abstractmethod
37
+ def to_dict(self):
38
+ raise NotImplementedError
39
+
40
+
41
+ @dataclasses.dataclass
42
+ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
43
+ """
44
+ Class representing the result of a custom model monitoring application.
45
+
46
+ :param name: (str) Name of the application result. This name must be
47
+ unique for each metric in a single application
48
+ (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
49
+ :param value: (float) Value of the application result.
50
+ :param kind: (ResultKindApp) Kind of application result.
51
+ :param status: (ResultStatusApp) Status of the application result.
52
+ :param extra_data: (dict) Extra data associated with the application result.
53
+ """
54
+
55
+ name: str
56
+ value: float
57
+ kind: mm_constant.ResultKindApp
58
+ status: mm_constant.ResultStatusApp
59
+ extra_data: dict = dataclasses.field(default_factory=dict)
60
+
61
+ def to_dict(self):
62
+ """
63
+ Convert the object to a dictionary format suitable for writing.
64
+
65
+ :returns: (dict) Dictionary representation of the result.
66
+ """
67
+ return {
68
+ mm_constant.ResultData.RESULT_NAME: self.name,
69
+ mm_constant.ResultData.RESULT_VALUE: self.value,
70
+ mm_constant.ResultData.RESULT_KIND: self.kind.value,
71
+ mm_constant.ResultData.RESULT_STATUS: self.status.value,
72
+ mm_constant.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
73
+ }
74
+
75
+
76
+ @dataclasses.dataclass
77
+ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
78
+ """
79
+ Class representing a single metric of a custom model monitoring application.
80
+
81
+ :param name: (str) Name of the application metric. This name must be
82
+ unique for each metric in a single application
83
+ (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
84
+ :param value: (float) Value of the application metric.
85
+ """
86
+
87
+ name: str
88
+ value: float
89
+
90
+ def to_dict(self):
91
+ """
92
+ Convert the object to a dictionary format suitable for writing.
93
+
94
+ :returns: (dict) Dictionary representation of the result.
95
+ """
96
+ return {
97
+ mm_constant.MetricData.METRIC_NAME: self.name,
98
+ mm_constant.MetricData.METRIC_VALUE: self.value,
99
+ }
@@ -457,6 +457,7 @@ class MonitoringApplicationController:
457
457
  )
458
458
 
459
459
  for start_infer_time, end_infer_time in batch_window.get_intervals():
460
+ # start - TODO : delete in 1.9.0 (V1 app deprecation)
460
461
  try:
461
462
  # Get application sample data
462
463
  offline_response = cls._get_sample_df(
@@ -504,7 +505,7 @@ class MonitoringApplicationController:
504
505
  current_stats = calculate_inputs_statistics(
505
506
  sample_set_statistics=feature_stats, inputs=df
506
507
  )
507
-
508
+ # end - TODO : delete in 1.9.0 (V1 app deprecation)
508
509
  cls._push_to_applications(
509
510
  current_stats=current_stats,
510
511
  feature_stats=feature_stats,
@@ -613,6 +614,7 @@ class MonitoringApplicationController:
613
614
  project=project,
614
615
  function_name=mm_constants.MonitoringFunctionNames.WRITER,
615
616
  ),
617
+ mm_constants.ApplicationEvent.MLRUN_CONTEXT: {}, # TODO : for future use by ad-hoc batch infer
616
618
  }
617
619
  for app_name in applications_names:
618
620
  data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
@@ -24,6 +24,7 @@ from sqlalchemy import (
24
24
  from mlrun.common.schemas.model_monitoring import (
25
25
  EventFieldType,
26
26
  FileTargetKind,
27
+ ResultData,
27
28
  SchedulingKeys,
28
29
  WriterEvent,
29
30
  )
@@ -114,7 +115,7 @@ class ApplicationResultBaseTable(BaseModel):
114
115
  )
115
116
 
116
117
  result_name = Column(
117
- WriterEvent.RESULT_NAME,
118
+ ResultData.RESULT_NAME,
118
119
  String(40),
119
120
  )
120
121
 
@@ -127,11 +128,11 @@ class ApplicationResultBaseTable(BaseModel):
127
128
  TIMESTAMP(timezone=True),
128
129
  )
129
130
 
130
- result_status = Column(WriterEvent.RESULT_STATUS, String(10))
131
- result_kind = Column(WriterEvent.RESULT_KIND, String(40))
132
- result_value = Column(WriterEvent.RESULT_VALUE, Float)
133
- result_extra_data = Column(WriterEvent.RESULT_EXTRA_DATA, Text)
134
- current_stats = Column(WriterEvent.CURRENT_STATS, Text)
131
+ result_status = Column(ResultData.RESULT_STATUS, String(10))
132
+ result_kind = Column(ResultData.RESULT_KIND, String(40))
133
+ result_value = Column(ResultData.RESULT_VALUE, Float)
134
+ result_extra_data = Column(ResultData.RESULT_EXTRA_DATA, Text)
135
+ current_stats = Column(ResultData.CURRENT_STATS, Text)
135
136
 
136
137
 
137
138
  class MonitoringSchedulesBaseTable(BaseModel):