mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (160) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +169 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +5 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +30 -9
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/inmem.py +2 -2
  41. mlrun/datastore/redis.py +2 -2
  42. mlrun/datastore/s3.py +5 -0
  43. mlrun/datastore/sources.py +147 -7
  44. mlrun/datastore/store_resources.py +7 -7
  45. mlrun/datastore/targets.py +110 -42
  46. mlrun/datastore/utils.py +42 -0
  47. mlrun/db/base.py +54 -10
  48. mlrun/db/httpdb.py +282 -79
  49. mlrun/db/nopdb.py +52 -10
  50. mlrun/errors.py +11 -0
  51. mlrun/execution.py +26 -9
  52. mlrun/feature_store/__init__.py +0 -2
  53. mlrun/feature_store/api.py +12 -47
  54. mlrun/feature_store/feature_set.py +9 -0
  55. mlrun/feature_store/feature_vector.py +8 -0
  56. mlrun/feature_store/ingestion.py +7 -6
  57. mlrun/feature_store/retrieval/base.py +9 -4
  58. mlrun/feature_store/retrieval/conversion.py +9 -9
  59. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  60. mlrun/feature_store/retrieval/job.py +9 -3
  61. mlrun/feature_store/retrieval/local_merger.py +2 -0
  62. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  63. mlrun/frameworks/__init__.py +6 -0
  64. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  65. mlrun/frameworks/parallel_coordinates.py +2 -1
  66. mlrun/frameworks/tf_keras/__init__.py +4 -1
  67. mlrun/k8s_utils.py +10 -11
  68. mlrun/launcher/base.py +4 -3
  69. mlrun/launcher/client.py +5 -3
  70. mlrun/launcher/local.py +12 -2
  71. mlrun/launcher/remote.py +9 -2
  72. mlrun/lists.py +6 -2
  73. mlrun/model.py +47 -21
  74. mlrun/model_monitoring/__init__.py +1 -1
  75. mlrun/model_monitoring/api.py +42 -18
  76. mlrun/model_monitoring/application.py +5 -305
  77. mlrun/model_monitoring/applications/__init__.py +11 -0
  78. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  79. mlrun/model_monitoring/applications/base.py +280 -0
  80. mlrun/model_monitoring/applications/context.py +214 -0
  81. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  82. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  83. mlrun/model_monitoring/applications/results.py +99 -0
  84. mlrun/model_monitoring/controller.py +3 -1
  85. mlrun/model_monitoring/db/__init__.py +2 -0
  86. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  87. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  88. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  89. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  90. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  91. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  92. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  93. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  94. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  95. mlrun/model_monitoring/db/tsdb/base.py +316 -0
  96. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  97. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  98. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  100. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +401 -0
  101. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  102. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  103. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +658 -0
  104. mlrun/model_monitoring/evidently_application.py +6 -118
  105. mlrun/model_monitoring/helpers.py +63 -1
  106. mlrun/model_monitoring/model_endpoint.py +3 -2
  107. mlrun/model_monitoring/stream_processing.py +57 -216
  108. mlrun/model_monitoring/writer.py +134 -124
  109. mlrun/package/__init__.py +13 -1
  110. mlrun/package/packagers/__init__.py +6 -1
  111. mlrun/package/utils/_formatter.py +2 -2
  112. mlrun/platforms/__init__.py +10 -9
  113. mlrun/platforms/iguazio.py +21 -202
  114. mlrun/projects/operations.py +24 -12
  115. mlrun/projects/pipelines.py +79 -102
  116. mlrun/projects/project.py +271 -103
  117. mlrun/render.py +15 -14
  118. mlrun/run.py +16 -46
  119. mlrun/runtimes/__init__.py +6 -3
  120. mlrun/runtimes/base.py +14 -7
  121. mlrun/runtimes/daskjob.py +1 -0
  122. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  123. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  124. mlrun/runtimes/funcdoc.py +0 -28
  125. mlrun/runtimes/kubejob.py +2 -1
  126. mlrun/runtimes/local.py +12 -3
  127. mlrun/runtimes/mpijob/__init__.py +0 -20
  128. mlrun/runtimes/mpijob/v1.py +1 -1
  129. mlrun/runtimes/nuclio/api_gateway.py +194 -84
  130. mlrun/runtimes/nuclio/application/application.py +170 -8
  131. mlrun/runtimes/nuclio/function.py +39 -49
  132. mlrun/runtimes/pod.py +16 -36
  133. mlrun/runtimes/remotesparkjob.py +9 -3
  134. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  135. mlrun/runtimes/utils.py +6 -45
  136. mlrun/serving/__init__.py +8 -1
  137. mlrun/serving/server.py +2 -1
  138. mlrun/serving/states.py +51 -8
  139. mlrun/serving/utils.py +19 -11
  140. mlrun/serving/v2_serving.py +5 -1
  141. mlrun/track/tracker.py +2 -1
  142. mlrun/utils/async_http.py +25 -5
  143. mlrun/utils/helpers.py +157 -83
  144. mlrun/utils/logger.py +39 -7
  145. mlrun/utils/notifications/notification/__init__.py +14 -9
  146. mlrun/utils/notifications/notification/base.py +1 -1
  147. mlrun/utils/notifications/notification/slack.py +34 -7
  148. mlrun/utils/notifications/notification/webhook.py +1 -1
  149. mlrun/utils/notifications/notification_pusher.py +147 -16
  150. mlrun/utils/regex.py +9 -0
  151. mlrun/utils/v3io_clients.py +0 -1
  152. mlrun/utils/version/version.json +2 -2
  153. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/METADATA +14 -6
  154. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/RECORD +158 -138
  155. mlrun/kfpops.py +0 -865
  156. mlrun/platforms/other.py +0 -305
  157. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/LICENSE +0 -0
  158. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/WHEEL +0 -0
  159. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/entry_points.txt +0 -0
  160. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import uuid
16
+ import warnings
17
+ from typing import Union
18
+
19
+ import pandas as pd
20
+ import semver
21
+
22
+ import mlrun.model_monitoring.applications.base as mm_base
23
+ import mlrun.model_monitoring.applications.context as mm_context
24
+ from mlrun.errors import MLRunIncompatibleVersionError
25
+
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
27
+
28
+
29
+ def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
30
+ if ref.is_compatible(cur) or (
31
+ cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
32
+ ):
33
+ return
34
+ if cur.major == ref.major == 0 and cur.minor > ref.minor:
35
+ warnings.warn(
36
+ f"Evidently version {cur} is not compatible with the tested "
37
+ f"version {ref}, use at your own risk."
38
+ )
39
+ else:
40
+ raise MLRunIncompatibleVersionError(
41
+ f"Evidently version {cur} is not supported, please change to "
42
+ f"{ref} (or another compatible version)."
43
+ )
44
+
45
+
46
+ _HAS_EVIDENTLY = False
47
+ try:
48
+ import evidently # noqa: F401
49
+
50
+ _check_evidently_version(
51
+ cur=semver.Version.parse(evidently.__version__),
52
+ ref=SUPPORTED_EVIDENTLY_VERSION,
53
+ )
54
+ _HAS_EVIDENTLY = True
55
+ except ModuleNotFoundError:
56
+ pass
57
+
58
+
59
+ if _HAS_EVIDENTLY:
60
+ from evidently.renderers.notebook_utils import determine_template
61
+ from evidently.report.report import Report
62
+ from evidently.suite.base_suite import Suite
63
+ from evidently.ui.type_aliases import STR_UUID
64
+ from evidently.ui.workspace import Workspace
65
+ from evidently.utils.dashboard import TemplateParams
66
+
67
+
68
+ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
69
+ def __init__(
70
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
71
+ ) -> None:
72
+ """
73
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
74
+ Note: evidently is not installed by default in the mlrun/mlrun image.
75
+ It must be installed separately to use this class.
76
+
77
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
78
+ :param evidently_project_id: (str) The ID of the Evidently project.
79
+
80
+ """
81
+ if not _HAS_EVIDENTLY:
82
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
83
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
84
+ self.evidently_project_id = evidently_project_id
85
+ self.evidently_project = self.evidently_workspace.get_project(
86
+ evidently_project_id
87
+ )
88
+
89
+ def log_evidently_object(
90
+ self, evidently_object: Union["Report", "Suite"], artifact_name: str
91
+ ):
92
+ """
93
+ Logs an Evidently report or suite as an artifact.
94
+
95
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
96
+ :param artifact_name: (str) The name for the logged artifact.
97
+ """
98
+ evidently_object_html = evidently_object.get_html()
99
+ self.context.log_artifact(
100
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
101
+ )
102
+
103
+ def log_project_dashboard(
104
+ self,
105
+ timestamp_start: pd.Timestamp,
106
+ timestamp_end: pd.Timestamp,
107
+ artifact_name: str = "dashboard",
108
+ ):
109
+ """
110
+ Logs an Evidently project dashboard.
111
+
112
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
113
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
114
+ :param artifact_name: (str) The name for the logged artifact.
115
+ """
116
+
117
+ dashboard_info = self.evidently_project.build_dashboard_info(
118
+ timestamp_start, timestamp_end
119
+ )
120
+ template_params = TemplateParams(
121
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
122
+ dashboard_info=dashboard_info,
123
+ additional_graphs={},
124
+ )
125
+
126
+ dashboard_html = self._render(determine_template("inline"), template_params)
127
+ self.context.log_artifact(
128
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
129
+ )
130
+
131
+ @staticmethod
132
+ def _render(temple_func, template_params: "TemplateParams"):
133
+ return temple_func(params=template_params)
134
+
135
+
136
+ class EvidentlyModelMonitoringApplicationBaseV2(
137
+ mm_base.ModelMonitoringApplicationBaseV2
138
+ ):
139
+ def __init__(
140
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
141
+ ) -> None:
142
+ """
143
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
144
+ Note: evidently is not installed by default in the mlrun/mlrun image.
145
+ It must be installed separately to use this class.
146
+
147
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
148
+ :param evidently_project_id: (str) The ID of the Evidently project.
149
+
150
+ """
151
+
152
+ # TODO : more then one project (mep -> project)
153
+ if not _HAS_EVIDENTLY:
154
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
155
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
156
+ self.evidently_project_id = evidently_project_id
157
+ self.evidently_project = self.evidently_workspace.get_project(
158
+ evidently_project_id
159
+ )
160
+
161
+ @staticmethod
162
+ def log_evidently_object(
163
+ monitoring_context: mm_context.MonitoringApplicationContext,
164
+ evidently_object: Union["Report", "Suite"],
165
+ artifact_name: str,
166
+ ):
167
+ """
168
+ Logs an Evidently report or suite as an artifact.
169
+
170
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
171
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
172
+ :param artifact_name: (str) The name for the logged artifact.
173
+ """
174
+ evidently_object_html = evidently_object.get_html()
175
+ monitoring_context.log_artifact(
176
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
177
+ )
178
+
179
+ def log_project_dashboard(
180
+ self,
181
+ monitoring_context: mm_context.MonitoringApplicationContext,
182
+ timestamp_start: pd.Timestamp,
183
+ timestamp_end: pd.Timestamp,
184
+ artifact_name: str = "dashboard",
185
+ ):
186
+ """
187
+ Logs an Evidently project dashboard.
188
+
189
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
190
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
191
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
192
+ :param artifact_name: (str) The name for the logged artifact.
193
+ """
194
+
195
+ dashboard_info = self.evidently_project.build_dashboard_info(
196
+ timestamp_start, timestamp_end
197
+ )
198
+ template_params = TemplateParams(
199
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
200
+ dashboard_info=dashboard_info,
201
+ additional_graphs={},
202
+ )
203
+
204
+ dashboard_html = self._render(determine_template("inline"), template_params)
205
+ monitoring_context.log_artifact(
206
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
207
+ )
208
+
209
+ @staticmethod
210
+ def _render(temple_func, template_params: "TemplateParams"):
211
+ return temple_func(params=template_params)
@@ -12,14 +12,17 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import json
15
16
  from dataclasses import dataclass
16
- from typing import Final, Optional, Protocol, cast
17
+ from typing import Final, Optional, Protocol, Union, cast
17
18
 
18
19
  import numpy as np
19
- from pandas import DataFrame, Series, Timestamp
20
+ from pandas import DataFrame, Series
20
21
 
21
22
  import mlrun.artifacts
22
23
  import mlrun.common.model_monitoring.helpers
24
+ import mlrun.model_monitoring.applications.context as mm_context
25
+ import mlrun.model_monitoring.applications.results as mm_results
23
26
  import mlrun.model_monitoring.features_drift_table as mm_drift_table
24
27
  from mlrun.common.schemas.model_monitoring.constants import (
25
28
  EventFieldType,
@@ -27,9 +30,8 @@ from mlrun.common.schemas.model_monitoring.constants import (
27
30
  ResultKindApp,
28
31
  ResultStatusApp,
29
32
  )
30
- from mlrun.model_monitoring.application import (
31
- ModelMonitoringApplicationBase,
32
- ModelMonitoringApplicationResult,
33
+ from mlrun.model_monitoring.applications import (
34
+ ModelMonitoringApplicationBaseV2,
33
35
  )
34
36
  from mlrun.model_monitoring.metrics.histogram_distance import (
35
37
  HellingerDistance,
@@ -85,17 +87,34 @@ class DataDriftClassifier:
85
87
  return ResultStatusApp.no_detection
86
88
 
87
89
 
88
- class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
90
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
89
91
  """
90
92
  MLRun's default data drift application for model monitoring.
91
93
 
92
- The application calculates the metrics over the features' histograms.
93
- Each metric is calculated over all the features, the mean is taken,
94
- and the status is returned.
94
+ The application expects tabular numerical data, and calculates three metrics over the features' histograms.
95
+ The three metrics are:
96
+
97
+ * Hellinger distance.
98
+ * Total variance distance.
99
+ * Kullback-Leibler divergence.
100
+
101
+ Each metric is calculated over all the features individually and the mean is taken as the metric value.
102
+ The average of Hellinger and total variance distance is taken as the result.
103
+
104
+ The application logs two artifacts:
105
+
106
+ * A JSON with the general drift per feature.
107
+ * A plotly table different metrics per feature.
108
+
109
+ This application is deployed by default when calling:
110
+
111
+ .. code-block:: python
112
+
113
+ project.enable_model_monitoring()
114
+
95
115
  """
96
116
 
97
117
  NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
98
- METRIC_KIND: Final[ResultKindApp] = ResultKindApp.data_drift
99
118
 
100
119
  _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
101
120
 
@@ -107,8 +126,6 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
107
126
 
108
127
  def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
109
128
  """
110
- Initialize the data drift application.
111
-
112
129
  :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
113
130
  If not provided, the default `DataDriftClassifier()` is used.
114
131
  """
@@ -118,17 +135,22 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
118
135
  ), "TVD and Hellinger distance are required for the general data drift result"
119
136
 
120
137
  def _compute_metrics_per_feature(
121
- self, sample_df_stats: DataFrame, feature_stats: DataFrame
138
+ self, monitoring_context: mm_context.MonitoringApplicationContext
122
139
  ) -> DataFrame:
123
140
  """Compute the metrics for the different features and labels"""
124
141
  metrics_per_feature = DataFrame(
125
142
  columns=[metric_class.NAME for metric_class in self.metrics]
126
143
  )
127
-
144
+ feature_stats = monitoring_context.dict_to_histogram(
145
+ monitoring_context.feature_stats
146
+ )
147
+ sample_df_stats = monitoring_context.dict_to_histogram(
148
+ monitoring_context.sample_df_stats
149
+ )
128
150
  for feature_name in feature_stats:
129
151
  sample_hist = np.asarray(sample_df_stats[feature_name])
130
152
  reference_hist = np.asarray(feature_stats[feature_name])
131
- self.context.logger.info(
153
+ monitoring_context.logger.info(
132
154
  "Computing metrics for feature", feature_name=feature_name
133
155
  )
134
156
  metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
@@ -137,62 +159,65 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
137
159
  ).compute()
138
160
  for metric in self.metrics
139
161
  }
140
- self.context.logger.info("Finished computing the metrics")
162
+ monitoring_context.logger.info("Finished computing the metrics")
141
163
 
142
164
  return metrics_per_feature
143
165
 
144
- def _add_general_drift_result(
145
- self, results: list[ModelMonitoringApplicationResult], value: float
146
- ) -> None:
147
- """Add the general drift result to the results list and log it"""
166
+ def _get_general_drift_result(
167
+ self,
168
+ metrics: list[mm_results.ModelMonitoringApplicationMetric],
169
+ monitoring_context: mm_context.MonitoringApplicationContext,
170
+ metrics_per_feature: DataFrame,
171
+ ) -> mm_results.ModelMonitoringApplicationResult:
172
+ """Get the general drift result from the metrics list"""
173
+ value = cast(
174
+ float,
175
+ np.mean(
176
+ [
177
+ metric.value
178
+ for metric in metrics
179
+ if metric.name
180
+ in [
181
+ f"{HellingerDistance.NAME}_mean",
182
+ f"{TotalVarianceDistance.NAME}_mean",
183
+ ]
184
+ ]
185
+ ),
186
+ )
187
+
148
188
  status = self._value_classifier.value_to_status(value)
149
- results.append(
150
- ModelMonitoringApplicationResult(
151
- name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
152
- value=value,
153
- kind=self.METRIC_KIND,
154
- status=status,
155
- )
189
+ return mm_results.ModelMonitoringApplicationResult(
190
+ name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
191
+ value=value,
192
+ kind=ResultKindApp.data_drift,
193
+ status=status,
194
+ extra_data={
195
+ EventFieldType.CURRENT_STATS: json.dumps(
196
+ monitoring_context.feature_stats
197
+ ),
198
+ EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
199
+ EventFieldType.DRIFT_STATUS: status.value,
200
+ },
156
201
  )
157
202
 
158
- def _get_results(
159
- self, metrics_per_feature: DataFrame
160
- ) -> list[ModelMonitoringApplicationResult]:
203
+ @staticmethod
204
+ def _get_metrics(
205
+ metrics_per_feature: DataFrame,
206
+ ) -> list[mm_results.ModelMonitoringApplicationMetric]:
161
207
  """Average the metrics over the features and add the status"""
162
- results: list[ModelMonitoringApplicationResult] = []
208
+ metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
163
209
 
164
- self.context.logger.debug("Averaging metrics over the features")
165
210
  metrics_mean = metrics_per_feature.mean().to_dict()
166
211
 
167
- self.context.logger.debug("Creating the results")
168
212
  for name, value in metrics_mean.items():
169
- if name == KullbackLeiblerDivergence.NAME:
170
- # This metric is not bounded from above [0, inf).
171
- # No status is currently reported for KL divergence
172
- status = ResultStatusApp.irrelevant
173
- else:
174
- status = self._value_classifier.value_to_status(value)
175
- results.append(
176
- ModelMonitoringApplicationResult(
213
+ metrics.append(
214
+ mm_results.ModelMonitoringApplicationMetric(
177
215
  name=f"{name}_mean",
178
216
  value=value,
179
- kind=self.METRIC_KIND,
180
- status=status,
181
217
  )
182
218
  )
183
219
 
184
- self._add_general_drift_result(
185
- results=results,
186
- value=np.mean(
187
- [
188
- metrics_mean[HellingerDistance.NAME],
189
- metrics_mean[TotalVarianceDistance.NAME],
190
- ]
191
- ),
192
- )
193
-
194
- self.context.logger.info("Finished with the results")
195
- return results
220
+ return metrics
196
221
 
197
222
  @staticmethod
198
223
  def _remove_timestamp_feature(
@@ -209,17 +234,21 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
209
234
  del sample_set_statistics[EventFieldType.TIMESTAMP]
210
235
  return sample_set_statistics
211
236
 
212
- def _log_json_artifact(self, drift_per_feature_values: Series) -> None:
237
+ @staticmethod
238
+ def _log_json_artifact(
239
+ drift_per_feature_values: Series,
240
+ monitoring_context: mm_context.MonitoringApplicationContext,
241
+ ) -> None:
213
242
  """Log the drift values as a JSON artifact"""
214
- self.context.logger.debug("Logging drift value per feature JSON artifact")
215
- self.context.log_artifact(
243
+ monitoring_context.logger.debug("Logging drift value per feature JSON artifact")
244
+ monitoring_context.log_artifact(
216
245
  mlrun.artifacts.Artifact(
217
246
  body=drift_per_feature_values.to_json(),
218
247
  format="json",
219
248
  key="features_drift_results",
220
249
  )
221
250
  )
222
- self.context.logger.debug("Logged JSON artifact successfully")
251
+ monitoring_context.logger.debug("Logged JSON artifact successfully")
223
252
 
224
253
  def _log_plotly_table_artifact(
225
254
  self,
@@ -227,34 +256,34 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
227
256
  inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
228
257
  metrics_per_feature: DataFrame,
229
258
  drift_per_feature_values: Series,
259
+ monitoring_context: mm_context.MonitoringApplicationContext,
230
260
  ) -> None:
231
261
  """Log the Plotly drift table artifact"""
232
- self.context.logger.debug(
262
+ monitoring_context.logger.debug(
233
263
  "Feature stats",
234
264
  sample_set_statistics=sample_set_statistics,
235
265
  inputs_statistics=inputs_statistics,
236
266
  )
237
267
 
238
- self.context.logger.debug("Computing drift results per feature")
268
+ monitoring_context.logger.debug("Computing drift results per feature")
239
269
  drift_results = {
240
270
  cast(str, key): (self._value_classifier.value_to_status(value), value)
241
271
  for key, value in drift_per_feature_values.items()
242
272
  }
243
- self.context.logger.debug("Logging plotly artifact")
244
- self.context.log_artifact(
273
+ monitoring_context.logger.debug("Logging plotly artifact")
274
+ monitoring_context.log_artifact(
245
275
  mm_drift_table.FeaturesDriftTablePlot().produce(
246
276
  sample_set_statistics=sample_set_statistics,
247
277
  inputs_statistics=inputs_statistics,
248
- metrics=metrics_per_feature.T.to_dict(),
278
+ metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
249
279
  drift_results=drift_results,
250
280
  )
251
281
  )
252
- self.context.logger.debug("Logged plotly artifact successfully")
282
+ monitoring_context.logger.debug("Logged plotly artifact successfully")
253
283
 
254
284
  def _log_drift_artifacts(
255
285
  self,
256
- sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
257
- inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
286
+ monitoring_context: mm_context.MonitoringApplicationContext,
258
287
  metrics_per_feature: DataFrame,
259
288
  log_json_artifact: bool = True,
260
289
  ) -> None:
@@ -264,45 +293,57 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
264
293
  ].mean(axis=1)
265
294
 
266
295
  if log_json_artifact:
267
- self._log_json_artifact(drift_per_feature_values)
296
+ self._log_json_artifact(drift_per_feature_values, monitoring_context)
268
297
 
269
298
  self._log_plotly_table_artifact(
270
- sample_set_statistics=self._remove_timestamp_feature(sample_set_statistics),
271
- inputs_statistics=inputs_statistics,
299
+ sample_set_statistics=self._remove_timestamp_feature(
300
+ monitoring_context.sample_df_stats
301
+ ),
302
+ inputs_statistics=monitoring_context.feature_stats,
272
303
  metrics_per_feature=metrics_per_feature,
273
304
  drift_per_feature_values=drift_per_feature_values,
305
+ monitoring_context=monitoring_context,
274
306
  )
275
307
 
276
308
  def do_tracking(
277
309
  self,
278
- application_name: str,
279
- sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
280
- feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
281
- sample_df: DataFrame,
282
- start_infer_time: Timestamp,
283
- end_infer_time: Timestamp,
284
- latest_request: Timestamp,
285
- endpoint_id: str,
286
- output_stream_uri: str,
287
- ) -> list[ModelMonitoringApplicationResult]:
310
+ monitoring_context: mm_context.MonitoringApplicationContext,
311
+ ) -> list[
312
+ Union[
313
+ mm_results.ModelMonitoringApplicationResult,
314
+ mm_results.ModelMonitoringApplicationMetric,
315
+ ]
316
+ ]:
288
317
  """
289
318
  Calculate and return the data drift metrics, averaged over the features.
290
319
 
291
- Refer to `ModelMonitoringApplicationBase` for the meaning of the
320
+ Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
292
321
  function arguments.
293
322
  """
294
- self.context.logger.debug("Starting to run the application")
323
+ monitoring_context.logger.debug("Starting to run the application")
324
+ if not monitoring_context.feature_stats:
325
+ monitoring_context.logger.info(
326
+ "No feature statistics found, skipping the application. \n"
327
+ "In order to run the application, training set must be provided when logging the model."
328
+ )
329
+ return []
295
330
  metrics_per_feature = self._compute_metrics_per_feature(
296
- sample_df_stats=self.dict_to_histogram(sample_df_stats),
297
- feature_stats=self.dict_to_histogram(feature_stats),
331
+ monitoring_context=monitoring_context
298
332
  )
299
- self.context.logger.debug("Saving artifacts")
333
+ monitoring_context.logger.debug("Saving artifacts")
300
334
  self._log_drift_artifacts(
301
- inputs_statistics=feature_stats,
302
- sample_set_statistics=sample_df_stats,
335
+ monitoring_context=monitoring_context,
336
+ metrics_per_feature=metrics_per_feature,
337
+ )
338
+ monitoring_context.logger.debug("Computing average per metric")
339
+ metrics = self._get_metrics(metrics_per_feature)
340
+ result = self._get_general_drift_result(
341
+ metrics=metrics,
342
+ monitoring_context=monitoring_context,
303
343
  metrics_per_feature=metrics_per_feature,
304
344
  )
305
- self.context.logger.debug("Computing average per metric")
306
- results = self._get_results(metrics_per_feature)
307
- self.context.logger.debug("Finished running the application", results=results)
308
- return results
345
+ metrics_and_result = metrics + [result]
346
+ monitoring_context.logger.debug(
347
+ "Finished running the application", results=metrics_and_result
348
+ )
349
+ return metrics_and_result
@@ -0,0 +1,99 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import json
17
+ import re
18
+ from abc import ABC, abstractmethod
19
+
20
+ import mlrun.common.helpers
21
+ import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.schemas.model_monitoring.constants as mm_constant
23
+ import mlrun.utils.v3io_clients
24
+
25
+
26
+ class _ModelMonitoringApplicationDataRes(ABC):
27
+ name: str
28
+
29
+ def __post_init__(self):
30
+ pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
31
+ if not re.fullmatch(pat, self.name):
32
+ raise mlrun.errors.MLRunInvalidArgumentError(
33
+ "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
34
+ )
35
+
36
+ @abstractmethod
37
+ def to_dict(self):
38
+ raise NotImplementedError
39
+
40
+
41
+ @dataclasses.dataclass
42
+ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
43
+ """
44
+ Class representing the result of a custom model monitoring application.
45
+
46
+ :param name: (str) Name of the application result. This name must be
47
+ unique for each metric in a single application
48
+ (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
49
+ :param value: (float) Value of the application result.
50
+ :param kind: (ResultKindApp) Kind of application result.
51
+ :param status: (ResultStatusApp) Status of the application result.
52
+ :param extra_data: (dict) Extra data associated with the application result.
53
+ """
54
+
55
+ name: str
56
+ value: float
57
+ kind: mm_constant.ResultKindApp
58
+ status: mm_constant.ResultStatusApp
59
+ extra_data: dict = dataclasses.field(default_factory=dict)
60
+
61
+ def to_dict(self):
62
+ """
63
+ Convert the object to a dictionary format suitable for writing.
64
+
65
+ :returns: (dict) Dictionary representation of the result.
66
+ """
67
+ return {
68
+ mm_constant.ResultData.RESULT_NAME: self.name,
69
+ mm_constant.ResultData.RESULT_VALUE: self.value,
70
+ mm_constant.ResultData.RESULT_KIND: self.kind.value,
71
+ mm_constant.ResultData.RESULT_STATUS: self.status.value,
72
+ mm_constant.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
73
+ }
74
+
75
+
76
+ @dataclasses.dataclass
77
+ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
78
+ """
79
+ Class representing a single metric of a custom model monitoring application.
80
+
81
+ :param name: (str) Name of the application metric. This name must be
82
+ unique for each metric in a single application
83
+ (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
84
+ :param value: (float) Value of the application metric.
85
+ """
86
+
87
+ name: str
88
+ value: float
89
+
90
+ def to_dict(self):
91
+ """
92
+ Convert the object to a dictionary format suitable for writing.
93
+
94
+ :returns: (dict) Dictionary representation of the result.
95
+ """
96
+ return {
97
+ mm_constant.MetricData.METRIC_NAME: self.name,
98
+ mm_constant.MetricData.METRIC_VALUE: self.value,
99
+ }