mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import uuid
16
+ import warnings
17
+ from typing import Union
18
+
19
+ import pandas as pd
20
+ import semver
21
+
22
+ import mlrun.model_monitoring.applications.base as mm_base
23
+ import mlrun.model_monitoring.applications.context as mm_context
24
+ from mlrun.errors import MLRunIncompatibleVersionError
25
+
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
27
+
28
+
29
+ def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
30
+ if ref.is_compatible(cur) or (
31
+ cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
32
+ ):
33
+ return
34
+ if cur.major == ref.major == 0 and cur.minor > ref.minor:
35
+ warnings.warn(
36
+ f"Evidently version {cur} is not compatible with the tested "
37
+ f"version {ref}, use at your own risk."
38
+ )
39
+ else:
40
+ raise MLRunIncompatibleVersionError(
41
+ f"Evidently version {cur} is not supported, please change to "
42
+ f"{ref} (or another compatible version)."
43
+ )
44
+
45
+
46
+ _HAS_EVIDENTLY = False
47
+ try:
48
+ import evidently # noqa: F401
49
+
50
+ _check_evidently_version(
51
+ cur=semver.Version.parse(evidently.__version__),
52
+ ref=SUPPORTED_EVIDENTLY_VERSION,
53
+ )
54
+ _HAS_EVIDENTLY = True
55
+ except ModuleNotFoundError:
56
+ pass
57
+
58
+
59
+ if _HAS_EVIDENTLY:
60
+ from evidently.renderers.notebook_utils import determine_template
61
+ from evidently.report.report import Report
62
+ from evidently.suite.base_suite import Suite
63
+ from evidently.ui.type_aliases import STR_UUID
64
+ from evidently.ui.workspace import Workspace
65
+ from evidently.utils.dashboard import TemplateParams
66
+
67
+
68
+ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
69
+ def __init__(
70
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
71
+ ) -> None:
72
+ """
73
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
74
+ Note: evidently is not installed by default in the mlrun/mlrun image.
75
+ It must be installed separately to use this class.
76
+
77
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
78
+ :param evidently_project_id: (str) The ID of the Evidently project.
79
+
80
+ """
81
+ if not _HAS_EVIDENTLY:
82
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
83
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
84
+ self.evidently_project_id = evidently_project_id
85
+ self.evidently_project = self.evidently_workspace.get_project(
86
+ evidently_project_id
87
+ )
88
+
89
+ def log_evidently_object(
90
+ self, evidently_object: Union["Report", "Suite"], artifact_name: str
91
+ ):
92
+ """
93
+ Logs an Evidently report or suite as an artifact.
94
+
95
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
96
+ :param artifact_name: (str) The name for the logged artifact.
97
+ """
98
+ evidently_object_html = evidently_object.get_html()
99
+ self.context.log_artifact(
100
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
101
+ )
102
+
103
+ def log_project_dashboard(
104
+ self,
105
+ timestamp_start: pd.Timestamp,
106
+ timestamp_end: pd.Timestamp,
107
+ artifact_name: str = "dashboard",
108
+ ):
109
+ """
110
+ Logs an Evidently project dashboard.
111
+
112
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
113
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
114
+ :param artifact_name: (str) The name for the logged artifact.
115
+ """
116
+
117
+ dashboard_info = self.evidently_project.build_dashboard_info(
118
+ timestamp_start, timestamp_end
119
+ )
120
+ template_params = TemplateParams(
121
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
122
+ dashboard_info=dashboard_info,
123
+ additional_graphs={},
124
+ )
125
+
126
+ dashboard_html = self._render(determine_template("inline"), template_params)
127
+ self.context.log_artifact(
128
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
129
+ )
130
+
131
+ @staticmethod
132
+ def _render(temple_func, template_params: "TemplateParams"):
133
+ return temple_func(params=template_params)
134
+
135
+
136
+ class EvidentlyModelMonitoringApplicationBaseV2(
137
+ mm_base.ModelMonitoringApplicationBaseV2
138
+ ):
139
+ def __init__(
140
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
141
+ ) -> None:
142
+ """
143
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
144
+ Note: evidently is not installed by default in the mlrun/mlrun image.
145
+ It must be installed separately to use this class.
146
+
147
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
148
+ :param evidently_project_id: (str) The ID of the Evidently project.
149
+
150
+ """
151
+
152
+ # TODO : more then one project (mep -> project)
153
+ if not _HAS_EVIDENTLY:
154
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
155
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
156
+ self.evidently_project_id = evidently_project_id
157
+ self.evidently_project = self.evidently_workspace.get_project(
158
+ evidently_project_id
159
+ )
160
+
161
+ @staticmethod
162
+ def log_evidently_object(
163
+ monitoring_context: mm_context.MonitoringApplicationContext,
164
+ evidently_object: Union["Report", "Suite"],
165
+ artifact_name: str,
166
+ ):
167
+ """
168
+ Logs an Evidently report or suite as an artifact.
169
+
170
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
171
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
172
+ :param artifact_name: (str) The name for the logged artifact.
173
+ """
174
+ evidently_object_html = evidently_object.get_html()
175
+ monitoring_context.log_artifact(
176
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
177
+ )
178
+
179
+ def log_project_dashboard(
180
+ self,
181
+ monitoring_context: mm_context.MonitoringApplicationContext,
182
+ timestamp_start: pd.Timestamp,
183
+ timestamp_end: pd.Timestamp,
184
+ artifact_name: str = "dashboard",
185
+ ):
186
+ """
187
+ Logs an Evidently project dashboard.
188
+
189
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
190
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
191
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
192
+ :param artifact_name: (str) The name for the logged artifact.
193
+ """
194
+
195
+ dashboard_info = self.evidently_project.build_dashboard_info(
196
+ timestamp_start, timestamp_end
197
+ )
198
+ template_params = TemplateParams(
199
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
200
+ dashboard_info=dashboard_info,
201
+ additional_graphs={},
202
+ )
203
+
204
+ dashboard_html = self._render(determine_template("inline"), template_params)
205
+ monitoring_context.log_artifact(
206
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
207
+ )
208
+
209
+ @staticmethod
210
+ def _render(temple_func, template_params: "TemplateParams"):
211
+ return temple_func(params=template_params)
@@ -12,22 +12,28 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import json
15
16
  from dataclasses import dataclass
16
- from typing import Final, Optional, Protocol
17
+ from typing import Final, Optional, Protocol, Union, cast
17
18
 
18
19
  import numpy as np
19
- from pandas import DataFrame, Timestamp
20
+ from pandas import DataFrame, Series
20
21
 
22
+ import mlrun.artifacts
23
+ import mlrun.common.model_monitoring.helpers
24
+ import mlrun.model_monitoring.applications.context as mm_context
25
+ import mlrun.model_monitoring.applications.results as mm_results
26
+ import mlrun.model_monitoring.features_drift_table as mm_drift_table
21
27
  from mlrun.common.schemas.model_monitoring.constants import (
22
- MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME,
28
+ EventFieldType,
29
+ HistogramDataDriftApplicationConstants,
23
30
  ResultKindApp,
24
31
  ResultStatusApp,
25
32
  )
26
- from mlrun.model_monitoring.application import (
27
- ModelMonitoringApplicationBase,
28
- ModelMonitoringApplicationResult,
33
+ from mlrun.model_monitoring.applications import (
34
+ ModelMonitoringApplicationBaseV2,
29
35
  )
30
- from mlrun.model_monitoring.batch import (
36
+ from mlrun.model_monitoring.metrics.histogram_distance import (
31
37
  HellingerDistance,
32
38
  HistogramDistanceMetric,
33
39
  KullbackLeiblerDivergence,
@@ -81,17 +87,34 @@ class DataDriftClassifier:
81
87
  return ResultStatusApp.no_detection
82
88
 
83
89
 
84
- class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
90
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
85
91
  """
86
92
  MLRun's default data drift application for model monitoring.
87
93
 
88
- The application calculates the metrics over the features' histograms.
89
- Each metric is calculated over all the features, the mean is taken,
90
- and the status is returned.
94
+ The application expects tabular numerical data, and calculates three metrics over the features' histograms.
95
+ The three metrics are:
96
+
97
+ * Hellinger distance.
98
+ * Total variance distance.
99
+ * Kullback-Leibler divergence.
100
+
101
+ Each metric is calculated over all the features individually and the mean is taken as the metric value.
102
+ The average of Hellinger and total variance distance is taken as the result.
103
+
104
+ The application logs two artifacts:
105
+
106
+ * A JSON with the general drift per feature.
107
+ * A plotly table different metrics per feature.
108
+
109
+ This application is deployed by default when calling:
110
+
111
+ .. code-block:: python
112
+
113
+ project.enable_model_monitoring()
114
+
91
115
  """
92
116
 
93
- NAME: Final[str] = MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME
94
- METRIC_KIND: Final[ResultKindApp] = ResultKindApp.data_drift
117
+ NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
95
118
 
96
119
  _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
97
120
 
@@ -103,8 +126,6 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
103
126
 
104
127
  def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
105
128
  """
106
- Initialize the data drift application.
107
-
108
129
  :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
109
130
  If not provided, the default `DataDriftClassifier()` is used.
110
131
  """
@@ -114,105 +135,215 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
114
135
  ), "TVD and Hellinger distance are required for the general data drift result"
115
136
 
116
137
  def _compute_metrics_per_feature(
117
- self, sample_df_stats: DataFrame, feature_stats: DataFrame
118
- ) -> dict[type[HistogramDistanceMetric], list[float]]:
138
+ self, monitoring_context: mm_context.MonitoringApplicationContext
139
+ ) -> DataFrame:
119
140
  """Compute the metrics for the different features and labels"""
120
- metrics_per_feature: dict[type[HistogramDistanceMetric], list[float]] = {
121
- metric_class: [] for metric_class in self.metrics
122
- }
123
-
124
- for (sample_feat, sample_hist), (reference_feat, reference_hist) in zip(
125
- sample_df_stats.items(), feature_stats.items()
126
- ):
127
- assert sample_feat == reference_feat, "The features do not match"
128
- self.context.logger.info(
129
- "Computing metrics for feature", feature_name=sample_feat
141
+ metrics_per_feature = DataFrame(
142
+ columns=[metric_class.NAME for metric_class in self.metrics]
143
+ )
144
+ feature_stats = monitoring_context.dict_to_histogram(
145
+ monitoring_context.feature_stats
146
+ )
147
+ sample_df_stats = monitoring_context.dict_to_histogram(
148
+ monitoring_context.sample_df_stats
149
+ )
150
+ for feature_name in feature_stats:
151
+ sample_hist = np.asarray(sample_df_stats[feature_name])
152
+ reference_hist = np.asarray(feature_stats[feature_name])
153
+ monitoring_context.logger.info(
154
+ "Computing metrics for feature", feature_name=feature_name
130
155
  )
131
- sample_arr = np.asarray(sample_hist)
132
- reference_arr = np.asarray(reference_hist)
133
- for metric in self.metrics:
134
- metric_name = metric.NAME
135
- self.context.logger.debug(
136
- "Computing data drift metric",
137
- metric_name=metric_name,
138
- feature_name=sample_feat,
139
- )
140
- metrics_per_feature[metric].append(
141
- metric(distrib_t=sample_arr, distrib_u=reference_arr).compute()
142
- )
143
- self.context.logger.info("Finished computing the metrics")
156
+ metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
157
+ metric.NAME: metric(
158
+ distrib_t=sample_hist, distrib_u=reference_hist
159
+ ).compute()
160
+ for metric in self.metrics
161
+ }
162
+ monitoring_context.logger.info("Finished computing the metrics")
144
163
 
145
164
  return metrics_per_feature
146
165
 
147
- def _add_general_drift_result(
148
- self, results: list[ModelMonitoringApplicationResult], value: float
149
- ) -> None:
150
- results.append(
151
- ModelMonitoringApplicationResult(
152
- name="general_drift",
153
- value=value,
154
- kind=self.METRIC_KIND,
155
- status=self._value_classifier.value_to_status(value),
156
- )
166
+ def _get_general_drift_result(
167
+ self,
168
+ metrics: list[mm_results.ModelMonitoringApplicationMetric],
169
+ monitoring_context: mm_context.MonitoringApplicationContext,
170
+ metrics_per_feature: DataFrame,
171
+ ) -> mm_results.ModelMonitoringApplicationResult:
172
+ """Get the general drift result from the metrics list"""
173
+ value = cast(
174
+ float,
175
+ np.mean(
176
+ [
177
+ metric.value
178
+ for metric in metrics
179
+ if metric.name
180
+ in [
181
+ f"{HellingerDistance.NAME}_mean",
182
+ f"{TotalVarianceDistance.NAME}_mean",
183
+ ]
184
+ ]
185
+ ),
186
+ )
187
+
188
+ status = self._value_classifier.value_to_status(value)
189
+ return mm_results.ModelMonitoringApplicationResult(
190
+ name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
191
+ value=value,
192
+ kind=ResultKindApp.data_drift,
193
+ status=status,
194
+ extra_data={
195
+ EventFieldType.CURRENT_STATS: json.dumps(
196
+ monitoring_context.feature_stats
197
+ ),
198
+ EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
199
+ EventFieldType.DRIFT_STATUS: status.value,
200
+ },
157
201
  )
158
202
 
159
- def _get_results(
160
- self, metrics_per_feature: dict[type[HistogramDistanceMetric], list[float]]
161
- ) -> list[ModelMonitoringApplicationResult]:
203
+ @staticmethod
204
+ def _get_metrics(
205
+ metrics_per_feature: DataFrame,
206
+ ) -> list[mm_results.ModelMonitoringApplicationMetric]:
162
207
  """Average the metrics over the features and add the status"""
163
- results: list[ModelMonitoringApplicationResult] = []
164
- hellinger_tvd_values: list[float] = []
165
- for metric_class, metric_values in metrics_per_feature.items():
166
- self.context.logger.debug(
167
- "Averaging metric over the features", metric_name=metric_class.NAME
168
- )
169
- value = np.mean(metric_values)
170
- if metric_class == KullbackLeiblerDivergence:
171
- # This metric is not bounded from above [0, inf).
172
- # No status is currently reported for KL divergence
173
- status = ResultStatusApp.irrelevant
174
- else:
175
- status = self._value_classifier.value_to_status(value)
176
- if metric_class in self._REQUIRED_METRICS:
177
- hellinger_tvd_values.append(value)
178
- results.append(
179
- ModelMonitoringApplicationResult(
180
- name=f"{metric_class.NAME}_mean",
208
+ metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
209
+
210
+ metrics_mean = metrics_per_feature.mean().to_dict()
211
+
212
+ for name, value in metrics_mean.items():
213
+ metrics.append(
214
+ mm_results.ModelMonitoringApplicationMetric(
215
+ name=f"{name}_mean",
181
216
  value=value,
182
- kind=self.METRIC_KIND,
183
- status=status,
184
217
  )
185
218
  )
186
219
 
187
- self._add_general_drift_result(
188
- results=results, value=np.mean(hellinger_tvd_values)
220
+ return metrics
221
+
222
+ @staticmethod
223
+ def _remove_timestamp_feature(
224
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
225
+ ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
226
+ """
227
+ Drop the 'timestamp' feature if it exists, as it is irrelevant
228
+ in the plotly artifact
229
+ """
230
+ sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
231
+ sample_set_statistics.copy()
232
+ )
233
+ if EventFieldType.TIMESTAMP in sample_set_statistics:
234
+ del sample_set_statistics[EventFieldType.TIMESTAMP]
235
+ return sample_set_statistics
236
+
237
+ @staticmethod
238
+ def _log_json_artifact(
239
+ drift_per_feature_values: Series,
240
+ monitoring_context: mm_context.MonitoringApplicationContext,
241
+ ) -> None:
242
+ """Log the drift values as a JSON artifact"""
243
+ monitoring_context.logger.debug("Logging drift value per feature JSON artifact")
244
+ monitoring_context.log_artifact(
245
+ mlrun.artifacts.Artifact(
246
+ body=drift_per_feature_values.to_json(),
247
+ format="json",
248
+ key="features_drift_results",
249
+ )
250
+ )
251
+ monitoring_context.logger.debug("Logged JSON artifact successfully")
252
+
253
+ def _log_plotly_table_artifact(
254
+ self,
255
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
256
+ inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
257
+ metrics_per_feature: DataFrame,
258
+ drift_per_feature_values: Series,
259
+ monitoring_context: mm_context.MonitoringApplicationContext,
260
+ ) -> None:
261
+ """Log the Plotly drift table artifact"""
262
+ monitoring_context.logger.debug(
263
+ "Feature stats",
264
+ sample_set_statistics=sample_set_statistics,
265
+ inputs_statistics=inputs_statistics,
266
+ )
267
+
268
+ monitoring_context.logger.debug("Computing drift results per feature")
269
+ drift_results = {
270
+ cast(str, key): (self._value_classifier.value_to_status(value), value)
271
+ for key, value in drift_per_feature_values.items()
272
+ }
273
+ monitoring_context.logger.debug("Logging plotly artifact")
274
+ monitoring_context.log_artifact(
275
+ mm_drift_table.FeaturesDriftTablePlot().produce(
276
+ sample_set_statistics=sample_set_statistics,
277
+ inputs_statistics=inputs_statistics,
278
+ metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
279
+ drift_results=drift_results,
280
+ )
189
281
  )
282
+ monitoring_context.logger.debug("Logged plotly artifact successfully")
190
283
 
191
- return results
284
+ def _log_drift_artifacts(
285
+ self,
286
+ monitoring_context: mm_context.MonitoringApplicationContext,
287
+ metrics_per_feature: DataFrame,
288
+ log_json_artifact: bool = True,
289
+ ) -> None:
290
+ """Log JSON and Plotly drift data per feature artifacts"""
291
+ drift_per_feature_values = metrics_per_feature[
292
+ [HellingerDistance.NAME, TotalVarianceDistance.NAME]
293
+ ].mean(axis=1)
294
+
295
+ if log_json_artifact:
296
+ self._log_json_artifact(drift_per_feature_values, monitoring_context)
297
+
298
+ self._log_plotly_table_artifact(
299
+ sample_set_statistics=self._remove_timestamp_feature(
300
+ monitoring_context.sample_df_stats
301
+ ),
302
+ inputs_statistics=monitoring_context.feature_stats,
303
+ metrics_per_feature=metrics_per_feature,
304
+ drift_per_feature_values=drift_per_feature_values,
305
+ monitoring_context=monitoring_context,
306
+ )
192
307
 
193
308
  def do_tracking(
194
309
  self,
195
- application_name: str,
196
- sample_df_stats: DataFrame,
197
- feature_stats: DataFrame,
198
- sample_df: DataFrame,
199
- start_infer_time: Timestamp,
200
- end_infer_time: Timestamp,
201
- latest_request: Timestamp,
202
- endpoint_id: str,
203
- output_stream_uri: str,
204
- ) -> list[ModelMonitoringApplicationResult]:
310
+ monitoring_context: mm_context.MonitoringApplicationContext,
311
+ ) -> list[
312
+ Union[
313
+ mm_results.ModelMonitoringApplicationResult,
314
+ mm_results.ModelMonitoringApplicationMetric,
315
+ ]
316
+ ]:
205
317
  """
206
318
  Calculate and return the data drift metrics, averaged over the features.
207
319
 
208
- Refer to `ModelMonitoringApplicationBase` for the meaning of the
320
+ Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
209
321
  function arguments.
210
322
  """
211
- self.context.logger.debug("Starting to run the application")
323
+ monitoring_context.logger.debug("Starting to run the application")
324
+ if not monitoring_context.feature_stats:
325
+ monitoring_context.logger.info(
326
+ "No feature statistics found, skipping the application. \n"
327
+ "In order to run the application, training set must be provided when logging the model."
328
+ )
329
+ return []
212
330
  metrics_per_feature = self._compute_metrics_per_feature(
213
- sample_df_stats=sample_df_stats, feature_stats=feature_stats
331
+ monitoring_context=monitoring_context
332
+ )
333
+ monitoring_context.logger.debug("Saving artifacts")
334
+ self._log_drift_artifacts(
335
+ monitoring_context=monitoring_context,
336
+ metrics_per_feature=metrics_per_feature,
337
+ )
338
+ monitoring_context.logger.debug("Computing average per metric")
339
+ metrics = self._get_metrics(metrics_per_feature)
340
+ result = self._get_general_drift_result(
341
+ metrics=metrics,
342
+ monitoring_context=monitoring_context,
343
+ metrics_per_feature=metrics_per_feature,
344
+ )
345
+ metrics_and_result = metrics + [result]
346
+ monitoring_context.logger.debug(
347
+ "Finished running the application", results=metrics_and_result
214
348
  )
215
- self.context.logger.debug("Computing average per metric")
216
- results = self._get_results(metrics_per_feature)
217
- self.context.logger.debug("Finished running the application", results=results)
218
- return results
349
+ return metrics_and_result