mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (291) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +26 -112
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +46 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +47 -48
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +69 -0
  13. mlrun/common/db/sql_session.py +2 -3
  14. mlrun/common/formatters/__init__.py +19 -0
  15. mlrun/common/formatters/artifact.py +21 -0
  16. mlrun/common/formatters/base.py +78 -0
  17. mlrun/common/formatters/function.py +41 -0
  18. mlrun/common/formatters/pipeline.py +53 -0
  19. mlrun/common/formatters/project.py +51 -0
  20. mlrun/common/helpers.py +1 -2
  21. mlrun/common/model_monitoring/helpers.py +9 -5
  22. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  23. mlrun/common/schemas/__init__.py +24 -4
  24. mlrun/common/schemas/alert.py +203 -0
  25. mlrun/common/schemas/api_gateway.py +148 -0
  26. mlrun/common/schemas/artifact.py +18 -8
  27. mlrun/common/schemas/auth.py +11 -5
  28. mlrun/common/schemas/background_task.py +1 -1
  29. mlrun/common/schemas/client_spec.py +4 -1
  30. mlrun/common/schemas/feature_store.py +16 -16
  31. mlrun/common/schemas/frontend_spec.py +8 -7
  32. mlrun/common/schemas/function.py +5 -1
  33. mlrun/common/schemas/hub.py +11 -18
  34. mlrun/common/schemas/memory_reports.py +2 -2
  35. mlrun/common/schemas/model_monitoring/__init__.py +18 -3
  36. mlrun/common/schemas/model_monitoring/constants.py +83 -26
  37. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  38. mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
  39. mlrun/common/schemas/notification.py +4 -4
  40. mlrun/common/schemas/object.py +2 -2
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +1 -10
  43. mlrun/common/schemas/project.py +24 -23
  44. mlrun/common/schemas/runtime_resource.py +8 -12
  45. mlrun/common/schemas/schedule.py +3 -3
  46. mlrun/common/schemas/tag.py +1 -2
  47. mlrun/common/schemas/workflow.py +2 -2
  48. mlrun/common/types.py +7 -1
  49. mlrun/config.py +54 -17
  50. mlrun/data_types/to_pandas.py +10 -12
  51. mlrun/datastore/__init__.py +5 -8
  52. mlrun/datastore/alibaba_oss.py +130 -0
  53. mlrun/datastore/azure_blob.py +17 -5
  54. mlrun/datastore/base.py +62 -39
  55. mlrun/datastore/datastore.py +28 -9
  56. mlrun/datastore/datastore_profile.py +146 -20
  57. mlrun/datastore/filestore.py +0 -1
  58. mlrun/datastore/google_cloud_storage.py +6 -2
  59. mlrun/datastore/hdfs.py +56 -0
  60. mlrun/datastore/inmem.py +2 -2
  61. mlrun/datastore/redis.py +6 -2
  62. mlrun/datastore/s3.py +9 -0
  63. mlrun/datastore/snowflake_utils.py +43 -0
  64. mlrun/datastore/sources.py +201 -96
  65. mlrun/datastore/spark_utils.py +1 -2
  66. mlrun/datastore/store_resources.py +7 -7
  67. mlrun/datastore/targets.py +358 -104
  68. mlrun/datastore/utils.py +72 -58
  69. mlrun/datastore/v3io.py +5 -1
  70. mlrun/db/base.py +185 -35
  71. mlrun/db/factory.py +1 -1
  72. mlrun/db/httpdb.py +614 -179
  73. mlrun/db/nopdb.py +210 -26
  74. mlrun/errors.py +12 -1
  75. mlrun/execution.py +41 -24
  76. mlrun/feature_store/__init__.py +0 -2
  77. mlrun/feature_store/api.py +40 -72
  78. mlrun/feature_store/common.py +1 -1
  79. mlrun/feature_store/feature_set.py +76 -55
  80. mlrun/feature_store/feature_vector.py +28 -30
  81. mlrun/feature_store/ingestion.py +7 -6
  82. mlrun/feature_store/retrieval/base.py +16 -11
  83. mlrun/feature_store/retrieval/conversion.py +11 -13
  84. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  85. mlrun/feature_store/retrieval/job.py +9 -3
  86. mlrun/feature_store/retrieval/local_merger.py +2 -0
  87. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  88. mlrun/feature_store/steps.py +37 -34
  89. mlrun/features.py +9 -20
  90. mlrun/frameworks/_common/artifacts_library.py +9 -9
  91. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  92. mlrun/frameworks/_common/model_handler.py +48 -48
  93. mlrun/frameworks/_common/plan.py +2 -3
  94. mlrun/frameworks/_common/producer.py +3 -4
  95. mlrun/frameworks/_common/utils.py +5 -5
  96. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  97. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  98. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  99. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  100. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  101. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  102. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  103. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  104. mlrun/frameworks/_ml_common/plan.py +1 -1
  105. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  109. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  110. mlrun/frameworks/_ml_common/utils.py +4 -4
  111. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  112. mlrun/frameworks/huggingface/model_server.py +4 -4
  113. mlrun/frameworks/lgbm/__init__.py +33 -33
  114. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  117. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  119. mlrun/frameworks/lgbm/model_handler.py +10 -10
  120. mlrun/frameworks/lgbm/model_server.py +6 -6
  121. mlrun/frameworks/lgbm/utils.py +5 -5
  122. mlrun/frameworks/onnx/dataset.py +8 -8
  123. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  124. mlrun/frameworks/onnx/model_handler.py +6 -6
  125. mlrun/frameworks/onnx/model_server.py +7 -7
  126. mlrun/frameworks/parallel_coordinates.py +4 -3
  127. mlrun/frameworks/pytorch/__init__.py +18 -18
  128. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  129. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  130. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  131. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  132. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  133. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  134. mlrun/frameworks/pytorch/model_handler.py +17 -17
  135. mlrun/frameworks/pytorch/model_server.py +7 -7
  136. mlrun/frameworks/sklearn/__init__.py +13 -13
  137. mlrun/frameworks/sklearn/estimator.py +4 -4
  138. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  139. mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
  140. mlrun/frameworks/sklearn/model_handler.py +2 -2
  141. mlrun/frameworks/tf_keras/__init__.py +10 -7
  142. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  143. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  144. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  145. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  146. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  147. mlrun/frameworks/tf_keras/model_server.py +6 -6
  148. mlrun/frameworks/xgboost/__init__.py +13 -13
  149. mlrun/frameworks/xgboost/model_handler.py +6 -6
  150. mlrun/k8s_utils.py +14 -16
  151. mlrun/launcher/__init__.py +1 -1
  152. mlrun/launcher/base.py +16 -15
  153. mlrun/launcher/client.py +8 -6
  154. mlrun/launcher/factory.py +1 -1
  155. mlrun/launcher/local.py +17 -11
  156. mlrun/launcher/remote.py +16 -10
  157. mlrun/lists.py +7 -6
  158. mlrun/model.py +238 -73
  159. mlrun/model_monitoring/__init__.py +1 -1
  160. mlrun/model_monitoring/api.py +138 -315
  161. mlrun/model_monitoring/application.py +5 -296
  162. mlrun/model_monitoring/applications/__init__.py +24 -0
  163. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  164. mlrun/model_monitoring/applications/base.py +282 -0
  165. mlrun/model_monitoring/applications/context.py +214 -0
  166. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  167. mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
  168. mlrun/model_monitoring/applications/results.py +99 -0
  169. mlrun/model_monitoring/controller.py +104 -84
  170. mlrun/model_monitoring/controller_handler.py +13 -5
  171. mlrun/model_monitoring/db/__init__.py +18 -0
  172. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  173. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  174. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
  175. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  176. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  177. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  178. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  179. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  180. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  181. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  182. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
  183. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  184. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  185. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  187. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  188. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  189. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  190. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  191. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  192. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  193. mlrun/model_monitoring/evidently_application.py +6 -118
  194. mlrun/model_monitoring/features_drift_table.py +134 -106
  195. mlrun/model_monitoring/helpers.py +127 -28
  196. mlrun/model_monitoring/metrics/__init__.py +13 -0
  197. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  198. mlrun/model_monitoring/model_endpoint.py +3 -2
  199. mlrun/model_monitoring/prometheus.py +1 -4
  200. mlrun/model_monitoring/stream_processing.py +62 -231
  201. mlrun/model_monitoring/tracking_policy.py +9 -2
  202. mlrun/model_monitoring/writer.py +152 -124
  203. mlrun/package/__init__.py +6 -6
  204. mlrun/package/context_handler.py +5 -5
  205. mlrun/package/packager.py +7 -7
  206. mlrun/package/packagers/default_packager.py +6 -6
  207. mlrun/package/packagers/numpy_packagers.py +15 -15
  208. mlrun/package/packagers/pandas_packagers.py +5 -5
  209. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  210. mlrun/package/packagers_manager.py +19 -23
  211. mlrun/package/utils/_formatter.py +6 -6
  212. mlrun/package/utils/_pickler.py +2 -2
  213. mlrun/package/utils/_supported_format.py +4 -4
  214. mlrun/package/utils/log_hint_utils.py +2 -2
  215. mlrun/package/utils/type_hint_utils.py +4 -9
  216. mlrun/platforms/__init__.py +11 -10
  217. mlrun/platforms/iguazio.py +24 -203
  218. mlrun/projects/operations.py +35 -21
  219. mlrun/projects/pipelines.py +68 -99
  220. mlrun/projects/project.py +830 -266
  221. mlrun/render.py +3 -11
  222. mlrun/run.py +162 -166
  223. mlrun/runtimes/__init__.py +62 -7
  224. mlrun/runtimes/base.py +39 -32
  225. mlrun/runtimes/daskjob.py +8 -8
  226. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  227. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
  228. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  229. mlrun/runtimes/funcdoc.py +0 -28
  230. mlrun/runtimes/function_reference.py +1 -1
  231. mlrun/runtimes/kubejob.py +28 -122
  232. mlrun/runtimes/local.py +6 -3
  233. mlrun/runtimes/mpijob/__init__.py +0 -20
  234. mlrun/runtimes/mpijob/abstract.py +9 -10
  235. mlrun/runtimes/mpijob/v1.py +1 -1
  236. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  237. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  238. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  239. mlrun/runtimes/nuclio/application/application.py +523 -0
  240. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  241. mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
  242. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  243. mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
  244. mlrun/runtimes/pod.py +286 -88
  245. mlrun/runtimes/remotesparkjob.py +2 -2
  246. mlrun/runtimes/sparkjob/spark3job.py +51 -34
  247. mlrun/runtimes/utils.py +7 -75
  248. mlrun/secrets.py +9 -5
  249. mlrun/serving/remote.py +2 -7
  250. mlrun/serving/routers.py +13 -10
  251. mlrun/serving/server.py +22 -26
  252. mlrun/serving/states.py +99 -25
  253. mlrun/serving/utils.py +3 -3
  254. mlrun/serving/v1_serving.py +6 -7
  255. mlrun/serving/v2_serving.py +59 -20
  256. mlrun/track/tracker.py +2 -1
  257. mlrun/track/tracker_manager.py +3 -3
  258. mlrun/track/trackers/mlflow_tracker.py +1 -2
  259. mlrun/utils/async_http.py +5 -7
  260. mlrun/utils/azure_vault.py +1 -1
  261. mlrun/utils/clones.py +1 -2
  262. mlrun/utils/condition_evaluator.py +3 -3
  263. mlrun/utils/db.py +3 -3
  264. mlrun/utils/helpers.py +183 -197
  265. mlrun/utils/http.py +2 -5
  266. mlrun/utils/logger.py +76 -14
  267. mlrun/utils/notifications/notification/__init__.py +17 -12
  268. mlrun/utils/notifications/notification/base.py +14 -2
  269. mlrun/utils/notifications/notification/console.py +2 -0
  270. mlrun/utils/notifications/notification/git.py +3 -1
  271. mlrun/utils/notifications/notification/ipython.py +3 -1
  272. mlrun/utils/notifications/notification/slack.py +101 -21
  273. mlrun/utils/notifications/notification/webhook.py +11 -1
  274. mlrun/utils/notifications/notification_pusher.py +155 -30
  275. mlrun/utils/retryer.py +208 -0
  276. mlrun/utils/singleton.py +1 -1
  277. mlrun/utils/v3io_clients.py +2 -4
  278. mlrun/utils/version/version.json +2 -2
  279. mlrun/utils/version/version.py +2 -6
  280. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
  281. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  282. mlrun/kfpops.py +0 -868
  283. mlrun/model_monitoring/batch.py +0 -1095
  284. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  285. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  286. mlrun/platforms/other.py +0 -306
  287. mlrun-1.6.4rc2.dist-info/RECORD +0 -314
  288. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  289. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
  290. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  291. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import uuid
16
+ import warnings
17
+ from typing import Union
18
+
19
+ import pandas as pd
20
+ import semver
21
+
22
+ import mlrun.model_monitoring.applications.base as mm_base
23
+ import mlrun.model_monitoring.applications.context as mm_context
24
+ from mlrun.errors import MLRunIncompatibleVersionError
25
+
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
27
+
28
+
29
+ def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
30
+ if ref.is_compatible(cur) or (
31
+ cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
32
+ ):
33
+ return
34
+ if cur.major == ref.major == 0 and cur.minor > ref.minor:
35
+ warnings.warn(
36
+ f"Evidently version {cur} is not compatible with the tested "
37
+ f"version {ref}, use at your own risk."
38
+ )
39
+ else:
40
+ raise MLRunIncompatibleVersionError(
41
+ f"Evidently version {cur} is not supported, please change to "
42
+ f"{ref} (or another compatible version)."
43
+ )
44
+
45
+
46
+ _HAS_EVIDENTLY = False
47
+ try:
48
+ import evidently # noqa: F401
49
+
50
+ _check_evidently_version(
51
+ cur=semver.Version.parse(evidently.__version__),
52
+ ref=SUPPORTED_EVIDENTLY_VERSION,
53
+ )
54
+ _HAS_EVIDENTLY = True
55
+ except ModuleNotFoundError:
56
+ pass
57
+
58
+
59
+ if _HAS_EVIDENTLY:
60
+ from evidently.renderers.notebook_utils import determine_template
61
+ from evidently.report.report import Report
62
+ from evidently.suite.base_suite import Suite
63
+ from evidently.ui.type_aliases import STR_UUID
64
+ from evidently.ui.workspace import Workspace
65
+ from evidently.utils.dashboard import TemplateParams
66
+
67
+
68
+ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
69
+ def __init__(
70
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
71
+ ) -> None:
72
+ """
73
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
74
+ Note: evidently is not installed by default in the mlrun/mlrun image.
75
+ It must be installed separately to use this class.
76
+
77
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
78
+ :param evidently_project_id: (str) The ID of the Evidently project.
79
+
80
+ """
81
+ if not _HAS_EVIDENTLY:
82
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
83
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
84
+ self.evidently_project_id = evidently_project_id
85
+ self.evidently_project = self.evidently_workspace.get_project(
86
+ evidently_project_id
87
+ )
88
+
89
+ def log_evidently_object(
90
+ self, evidently_object: Union["Report", "Suite"], artifact_name: str
91
+ ):
92
+ """
93
+ Logs an Evidently report or suite as an artifact.
94
+
95
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
96
+ :param artifact_name: (str) The name for the logged artifact.
97
+ """
98
+ evidently_object_html = evidently_object.get_html()
99
+ self.context.log_artifact(
100
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
101
+ )
102
+
103
+ def log_project_dashboard(
104
+ self,
105
+ timestamp_start: pd.Timestamp,
106
+ timestamp_end: pd.Timestamp,
107
+ artifact_name: str = "dashboard",
108
+ ):
109
+ """
110
+ Logs an Evidently project dashboard.
111
+
112
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
113
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
114
+ :param artifact_name: (str) The name for the logged artifact.
115
+ """
116
+
117
+ dashboard_info = self.evidently_project.build_dashboard_info(
118
+ timestamp_start, timestamp_end
119
+ )
120
+ template_params = TemplateParams(
121
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
122
+ dashboard_info=dashboard_info,
123
+ additional_graphs={},
124
+ )
125
+
126
+ dashboard_html = self._render(determine_template("inline"), template_params)
127
+ self.context.log_artifact(
128
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
129
+ )
130
+
131
+ @staticmethod
132
+ def _render(temple_func, template_params: "TemplateParams"):
133
+ return temple_func(params=template_params)
134
+
135
+
136
+ class EvidentlyModelMonitoringApplicationBaseV2(
137
+ mm_base.ModelMonitoringApplicationBaseV2
138
+ ):
139
+ def __init__(
140
+ self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
141
+ ) -> None:
142
+ """
143
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
144
+ Note: evidently is not installed by default in the mlrun/mlrun image.
145
+ It must be installed separately to use this class.
146
+
147
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
148
+ :param evidently_project_id: (str) The ID of the Evidently project.
149
+
150
+ """
151
+
152
+ # TODO : more then one project (mep -> project)
153
+ if not _HAS_EVIDENTLY:
154
+ raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
155
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
156
+ self.evidently_project_id = evidently_project_id
157
+ self.evidently_project = self.evidently_workspace.get_project(
158
+ evidently_project_id
159
+ )
160
+
161
+ @staticmethod
162
+ def log_evidently_object(
163
+ monitoring_context: mm_context.MonitoringApplicationContext,
164
+ evidently_object: Union["Report", "Suite"],
165
+ artifact_name: str,
166
+ ):
167
+ """
168
+ Logs an Evidently report or suite as an artifact.
169
+
170
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
171
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
172
+ :param artifact_name: (str) The name for the logged artifact.
173
+ """
174
+ evidently_object_html = evidently_object.get_html()
175
+ monitoring_context.log_artifact(
176
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
177
+ )
178
+
179
+ def log_project_dashboard(
180
+ self,
181
+ monitoring_context: mm_context.MonitoringApplicationContext,
182
+ timestamp_start: pd.Timestamp,
183
+ timestamp_end: pd.Timestamp,
184
+ artifact_name: str = "dashboard",
185
+ ):
186
+ """
187
+ Logs an Evidently project dashboard.
188
+
189
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
190
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
191
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
192
+ :param artifact_name: (str) The name for the logged artifact.
193
+ """
194
+
195
+ dashboard_info = self.evidently_project.build_dashboard_info(
196
+ timestamp_start, timestamp_end
197
+ )
198
+ template_params = TemplateParams(
199
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
200
+ dashboard_info=dashboard_info,
201
+ additional_graphs={},
202
+ )
203
+
204
+ dashboard_html = self._render(determine_template("inline"), template_params)
205
+ monitoring_context.log_artifact(
206
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
207
+ )
208
+
209
+ @staticmethod
210
+ def _render(temple_func, template_params: "TemplateParams"):
211
+ return temple_func(params=template_params)
@@ -0,0 +1,349 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from dataclasses import dataclass
17
+ from typing import Final, Optional, Protocol, Union, cast
18
+
19
+ import numpy as np
20
+ from pandas import DataFrame, Series
21
+
22
+ import mlrun.artifacts
23
+ import mlrun.common.model_monitoring.helpers
24
+ import mlrun.model_monitoring.applications.context as mm_context
25
+ import mlrun.model_monitoring.applications.results as mm_results
26
+ import mlrun.model_monitoring.features_drift_table as mm_drift_table
27
+ from mlrun.common.schemas.model_monitoring.constants import (
28
+ EventFieldType,
29
+ HistogramDataDriftApplicationConstants,
30
+ ResultKindApp,
31
+ ResultStatusApp,
32
+ )
33
+ from mlrun.model_monitoring.applications import (
34
+ ModelMonitoringApplicationBaseV2,
35
+ )
36
+ from mlrun.model_monitoring.metrics.histogram_distance import (
37
+ HellingerDistance,
38
+ HistogramDistanceMetric,
39
+ KullbackLeiblerDivergence,
40
+ TotalVarianceDistance,
41
+ )
42
+
43
+
44
+ class InvalidMetricValueError(ValueError):
45
+ pass
46
+
47
+
48
+ class InvalidThresholdValueError(ValueError):
49
+ pass
50
+
51
+
52
+ class ValueClassifier(Protocol):
53
+ def value_to_status(self, value: float) -> ResultStatusApp: ...
54
+
55
+
56
+ @dataclass
57
+ class DataDriftClassifier:
58
+ """
59
+ Classify data drift numeric values into categorical status.
60
+ """
61
+
62
+ potential: float = 0.5
63
+ detected: float = 0.7
64
+
65
+ def __post_init__(self) -> None:
66
+ """Catch erroneous threshold values"""
67
+ if not 0 < self.potential < self.detected < 1:
68
+ raise InvalidThresholdValueError(
69
+ "The provided thresholds do not comply with the rules"
70
+ )
71
+
72
+ def value_to_status(self, value: float) -> ResultStatusApp:
73
+ """
74
+ Translate the numeric value into status category.
75
+
76
+ :param value: The numeric value of the data drift metric, between 0 and 1.
77
+ :returns: `ResultStatusApp` according to the classification.
78
+ """
79
+ if value > 1 or value < 0:
80
+ raise InvalidMetricValueError(
81
+ f"{value = } is invalid, must be in the range [0, 1]."
82
+ )
83
+ if value >= self.detected:
84
+ return ResultStatusApp.detected
85
+ if value >= self.potential:
86
+ return ResultStatusApp.potential_detection
87
+ return ResultStatusApp.no_detection
88
+
89
+
90
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
91
+ """
92
+ MLRun's default data drift application for model monitoring.
93
+
94
+ The application expects tabular numerical data, and calculates three metrics over the features' histograms.
95
+ The three metrics are:
96
+
97
+ * Hellinger distance.
98
+ * Total variance distance.
99
+ * Kullback-Leibler divergence.
100
+
101
+ Each metric is calculated over all the features individually and the mean is taken as the metric value.
102
+ The average of Hellinger and total variance distance is taken as the result.
103
+
104
+ The application logs two artifacts:
105
+
106
+ * A JSON with the general drift per feature.
107
+ * A plotly table different metrics per feature.
108
+
109
+ This application is deployed by default when calling:
110
+
111
+ .. code-block:: python
112
+
113
+ project.enable_model_monitoring()
114
+
115
+ """
116
+
117
+ NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
118
+
119
+ _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
120
+
121
+ metrics: list[type[HistogramDistanceMetric]] = [
122
+ HellingerDistance,
123
+ KullbackLeiblerDivergence,
124
+ TotalVarianceDistance,
125
+ ]
126
+
127
+ def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
128
+ """
129
+ :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
130
+ If not provided, the default `DataDriftClassifier()` is used.
131
+ """
132
+ self._value_classifier = value_classifier or DataDriftClassifier()
133
+ assert self._REQUIRED_METRICS <= set(
134
+ self.metrics
135
+ ), "TVD and Hellinger distance are required for the general data drift result"
136
+
137
+ def _compute_metrics_per_feature(
138
+ self, monitoring_context: mm_context.MonitoringApplicationContext
139
+ ) -> DataFrame:
140
+ """Compute the metrics for the different features and labels"""
141
+ metrics_per_feature = DataFrame(
142
+ columns=[metric_class.NAME for metric_class in self.metrics]
143
+ )
144
+ feature_stats = monitoring_context.dict_to_histogram(
145
+ monitoring_context.feature_stats
146
+ )
147
+ sample_df_stats = monitoring_context.dict_to_histogram(
148
+ monitoring_context.sample_df_stats
149
+ )
150
+ for feature_name in feature_stats:
151
+ sample_hist = np.asarray(sample_df_stats[feature_name])
152
+ reference_hist = np.asarray(feature_stats[feature_name])
153
+ monitoring_context.logger.info(
154
+ "Computing metrics for feature", feature_name=feature_name
155
+ )
156
+ metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
157
+ metric.NAME: metric(
158
+ distrib_t=sample_hist, distrib_u=reference_hist
159
+ ).compute()
160
+ for metric in self.metrics
161
+ }
162
+ monitoring_context.logger.info("Finished computing the metrics")
163
+
164
+ return metrics_per_feature
165
+
166
+ def _get_general_drift_result(
167
+ self,
168
+ metrics: list[mm_results.ModelMonitoringApplicationMetric],
169
+ monitoring_context: mm_context.MonitoringApplicationContext,
170
+ metrics_per_feature: DataFrame,
171
+ ) -> mm_results.ModelMonitoringApplicationResult:
172
+ """Get the general drift result from the metrics list"""
173
+ value = cast(
174
+ float,
175
+ np.mean(
176
+ [
177
+ metric.value
178
+ for metric in metrics
179
+ if metric.name
180
+ in [
181
+ f"{HellingerDistance.NAME}_mean",
182
+ f"{TotalVarianceDistance.NAME}_mean",
183
+ ]
184
+ ]
185
+ ),
186
+ )
187
+
188
+ status = self._value_classifier.value_to_status(value)
189
+ return mm_results.ModelMonitoringApplicationResult(
190
+ name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
191
+ value=value,
192
+ kind=ResultKindApp.data_drift,
193
+ status=status,
194
+ extra_data={
195
+ EventFieldType.CURRENT_STATS: json.dumps(
196
+ monitoring_context.feature_stats
197
+ ),
198
+ EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
199
+ EventFieldType.DRIFT_STATUS: status.value,
200
+ },
201
+ )
202
+
203
+ @staticmethod
204
+ def _get_metrics(
205
+ metrics_per_feature: DataFrame,
206
+ ) -> list[mm_results.ModelMonitoringApplicationMetric]:
207
+ """Average the metrics over the features and add the status"""
208
+ metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
209
+
210
+ metrics_mean = metrics_per_feature.mean().to_dict()
211
+
212
+ for name, value in metrics_mean.items():
213
+ metrics.append(
214
+ mm_results.ModelMonitoringApplicationMetric(
215
+ name=f"{name}_mean",
216
+ value=value,
217
+ )
218
+ )
219
+
220
+ return metrics
221
+
222
+ @staticmethod
223
+ def _remove_timestamp_feature(
224
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
225
+ ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
226
+ """
227
+ Drop the 'timestamp' feature if it exists, as it is irrelevant
228
+ in the plotly artifact
229
+ """
230
+ sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
231
+ sample_set_statistics.copy()
232
+ )
233
+ if EventFieldType.TIMESTAMP in sample_set_statistics:
234
+ del sample_set_statistics[EventFieldType.TIMESTAMP]
235
+ return sample_set_statistics
236
+
237
+ @staticmethod
238
+ def _log_json_artifact(
239
+ drift_per_feature_values: Series,
240
+ monitoring_context: mm_context.MonitoringApplicationContext,
241
+ ) -> None:
242
+ """Log the drift values as a JSON artifact"""
243
+ monitoring_context.logger.debug("Logging drift value per feature JSON artifact")
244
+ monitoring_context.log_artifact(
245
+ mlrun.artifacts.Artifact(
246
+ body=drift_per_feature_values.to_json(),
247
+ format="json",
248
+ key="features_drift_results",
249
+ )
250
+ )
251
+ monitoring_context.logger.debug("Logged JSON artifact successfully")
252
+
253
+ def _log_plotly_table_artifact(
254
+ self,
255
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
256
+ inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
257
+ metrics_per_feature: DataFrame,
258
+ drift_per_feature_values: Series,
259
+ monitoring_context: mm_context.MonitoringApplicationContext,
260
+ ) -> None:
261
+ """Log the Plotly drift table artifact"""
262
+ monitoring_context.logger.debug(
263
+ "Feature stats",
264
+ sample_set_statistics=sample_set_statistics,
265
+ inputs_statistics=inputs_statistics,
266
+ )
267
+
268
+ monitoring_context.logger.debug("Computing drift results per feature")
269
+ drift_results = {
270
+ cast(str, key): (self._value_classifier.value_to_status(value), value)
271
+ for key, value in drift_per_feature_values.items()
272
+ }
273
+ monitoring_context.logger.debug("Logging plotly artifact")
274
+ monitoring_context.log_artifact(
275
+ mm_drift_table.FeaturesDriftTablePlot().produce(
276
+ sample_set_statistics=sample_set_statistics,
277
+ inputs_statistics=inputs_statistics,
278
+ metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
279
+ drift_results=drift_results,
280
+ )
281
+ )
282
+ monitoring_context.logger.debug("Logged plotly artifact successfully")
283
+
284
+ def _log_drift_artifacts(
285
+ self,
286
+ monitoring_context: mm_context.MonitoringApplicationContext,
287
+ metrics_per_feature: DataFrame,
288
+ log_json_artifact: bool = True,
289
+ ) -> None:
290
+ """Log JSON and Plotly drift data per feature artifacts"""
291
+ drift_per_feature_values = metrics_per_feature[
292
+ [HellingerDistance.NAME, TotalVarianceDistance.NAME]
293
+ ].mean(axis=1)
294
+
295
+ if log_json_artifact:
296
+ self._log_json_artifact(drift_per_feature_values, monitoring_context)
297
+
298
+ self._log_plotly_table_artifact(
299
+ sample_set_statistics=self._remove_timestamp_feature(
300
+ monitoring_context.sample_df_stats
301
+ ),
302
+ inputs_statistics=monitoring_context.feature_stats,
303
+ metrics_per_feature=metrics_per_feature,
304
+ drift_per_feature_values=drift_per_feature_values,
305
+ monitoring_context=monitoring_context,
306
+ )
307
+
308
+ def do_tracking(
309
+ self,
310
+ monitoring_context: mm_context.MonitoringApplicationContext,
311
+ ) -> list[
312
+ Union[
313
+ mm_results.ModelMonitoringApplicationResult,
314
+ mm_results.ModelMonitoringApplicationMetric,
315
+ ]
316
+ ]:
317
+ """
318
+ Calculate and return the data drift metrics, averaged over the features.
319
+
320
+ Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
321
+ function arguments.
322
+ """
323
+ monitoring_context.logger.debug("Starting to run the application")
324
+ if not monitoring_context.feature_stats:
325
+ monitoring_context.logger.info(
326
+ "No feature statistics found, skipping the application. \n"
327
+ "In order to run the application, training set must be provided when logging the model."
328
+ )
329
+ return []
330
+ metrics_per_feature = self._compute_metrics_per_feature(
331
+ monitoring_context=monitoring_context
332
+ )
333
+ monitoring_context.logger.debug("Saving artifacts")
334
+ self._log_drift_artifacts(
335
+ monitoring_context=monitoring_context,
336
+ metrics_per_feature=metrics_per_feature,
337
+ )
338
+ monitoring_context.logger.debug("Computing average per metric")
339
+ metrics = self._get_metrics(metrics_per_feature)
340
+ result = self._get_general_drift_result(
341
+ metrics=metrics,
342
+ monitoring_context=monitoring_context,
343
+ metrics_per_feature=metrics_per_feature,
344
+ )
345
+ metrics_and_result = metrics + [result]
346
+ monitoring_context.logger.debug(
347
+ "Finished running the application", results=metrics_and_result
348
+ )
349
+ return metrics_and_result
@@ -0,0 +1,99 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import json
17
+ import re
18
+ from abc import ABC, abstractmethod
19
+
20
+ import mlrun.common.helpers
21
+ import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.schemas.model_monitoring.constants as mm_constant
23
+ import mlrun.utils.v3io_clients
24
+
25
+
26
+ class _ModelMonitoringApplicationDataRes(ABC):
27
+ name: str
28
+
29
+ def __post_init__(self):
30
+ pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
31
+ if not re.fullmatch(pat, self.name):
32
+ raise mlrun.errors.MLRunInvalidArgumentError(
33
+ "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
34
+ )
35
+
36
+ @abstractmethod
37
+ def to_dict(self):
38
+ raise NotImplementedError
39
+
40
+
41
+ @dataclasses.dataclass
42
+ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
43
+ """
44
+ Class representing the result of a custom model monitoring application.
45
+
46
+ :param name: (str) Name of the application result. This name must be
47
+ unique for each metric in a single application
48
+ (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
49
+ :param value: (float) Value of the application result.
50
+ :param kind: (ResultKindApp) Kind of application result.
51
+ :param status: (ResultStatusApp) Status of the application result.
52
+ :param extra_data: (dict) Extra data associated with the application result.
53
+ """
54
+
55
+ name: str
56
+ value: float
57
+ kind: mm_constant.ResultKindApp
58
+ status: mm_constant.ResultStatusApp
59
+ extra_data: dict = dataclasses.field(default_factory=dict)
60
+
61
+ def to_dict(self):
62
+ """
63
+ Convert the object to a dictionary format suitable for writing.
64
+
65
+ :returns: (dict) Dictionary representation of the result.
66
+ """
67
+ return {
68
+ mm_constant.ResultData.RESULT_NAME: self.name,
69
+ mm_constant.ResultData.RESULT_VALUE: self.value,
70
+ mm_constant.ResultData.RESULT_KIND: self.kind.value,
71
+ mm_constant.ResultData.RESULT_STATUS: self.status.value,
72
+ mm_constant.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
73
+ }
74
+
75
+
76
+ @dataclasses.dataclass
77
+ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
78
+ """
79
+ Class representing a single metric of a custom model monitoring application.
80
+
81
+ :param name: (str) Name of the application metric. This name must be
82
+ unique for each metric in a single application
83
+ (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
84
+ :param value: (float) Value of the application metric.
85
+ """
86
+
87
+ name: str
88
+ value: float
89
+
90
+ def to_dict(self):
91
+ """
92
+ Convert the object to a dictionary format suitable for writing.
93
+
94
+ :returns: (dict) Dictionary representation of the result.
95
+ """
96
+ return {
97
+ mm_constant.MetricData.METRIC_NAME: self.name,
98
+ mm_constant.MetricData.METRIC_VALUE: self.value,
99
+ }