mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +131 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +129 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc7.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc7.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
@@ -14,15 +14,16 @@
14
14
 
15
15
  import uuid
16
16
  import warnings
17
- from typing import Union
17
+ from abc import ABC
18
18
 
19
19
  import pandas as pd
20
20
  import semver
21
21
 
22
+ import mlrun.model_monitoring.applications.base as mm_base
23
+ import mlrun.model_monitoring.applications.context as mm_context
22
24
  from mlrun.errors import MLRunIncompatibleVersionError
23
- from mlrun.model_monitoring.application import ModelMonitoringApplicationBase
24
25
 
25
- SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.32")
26
27
 
27
28
 
28
29
  def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
@@ -56,15 +57,15 @@ except ModuleNotFoundError:
56
57
 
57
58
 
58
59
  if _HAS_EVIDENTLY:
59
- from evidently.renderers.notebook_utils import determine_template
60
- from evidently.report.report import Report
61
- from evidently.suite.base_suite import Suite
60
+ from evidently.suite.base_suite import Display
62
61
  from evidently.ui.type_aliases import STR_UUID
63
62
  from evidently.ui.workspace import Workspace
64
- from evidently.utils.dashboard import TemplateParams
63
+ from evidently.utils.dashboard import TemplateParams, file_html_template
65
64
 
66
65
 
67
- class EvidentlyModelMonitoringApplicationBase(ModelMonitoringApplicationBase):
66
+ class EvidentlyModelMonitoringApplicationBase(
67
+ mm_base.ModelMonitoringApplicationBase, ABC
68
+ ):
68
69
  def __init__(
69
70
  self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
70
71
  ) -> None:
@@ -77,6 +78,8 @@ class EvidentlyModelMonitoringApplicationBase(ModelMonitoringApplicationBase):
77
78
  :param evidently_project_id: (str) The ID of the Evidently project.
78
79
 
79
80
  """
81
+
82
+ # TODO : more then one project (mep -> project)
80
83
  if not _HAS_EVIDENTLY:
81
84
  raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
82
85
  self.evidently_workspace = Workspace.create(evidently_workspace_path)
@@ -85,32 +88,38 @@ class EvidentlyModelMonitoringApplicationBase(ModelMonitoringApplicationBase):
85
88
  evidently_project_id
86
89
  )
87
90
 
91
+ @staticmethod
88
92
  def log_evidently_object(
89
- self, evidently_object: Union["Report", "Suite"], artifact_name: str
90
- ):
93
+ monitoring_context: mm_context.MonitoringApplicationContext,
94
+ evidently_object: "Display",
95
+ artifact_name: str,
96
+ ) -> None:
91
97
  """
92
98
  Logs an Evidently report or suite as an artifact.
93
99
 
94
- :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
100
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
101
+ :param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
95
102
  :param artifact_name: (str) The name for the logged artifact.
96
103
  """
97
104
  evidently_object_html = evidently_object.get_html()
98
- self.context.log_artifact(
105
+ monitoring_context.log_artifact(
99
106
  artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
100
107
  )
101
108
 
102
109
  def log_project_dashboard(
103
110
  self,
111
+ monitoring_context: mm_context.MonitoringApplicationContext,
104
112
  timestamp_start: pd.Timestamp,
105
113
  timestamp_end: pd.Timestamp,
106
114
  artifact_name: str = "dashboard",
107
- ):
115
+ ) -> None:
108
116
  """
109
117
  Logs an Evidently project dashboard.
110
118
 
111
- :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
112
- :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
113
- :param artifact_name: (str) The name for the logged artifact.
119
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
120
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
121
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
122
+ :param artifact_name: (str) The name for the logged artifact.
114
123
  """
115
124
 
116
125
  dashboard_info = self.evidently_project.build_dashboard_info(
@@ -122,11 +131,7 @@ class EvidentlyModelMonitoringApplicationBase(ModelMonitoringApplicationBase):
122
131
  additional_graphs={},
123
132
  )
124
133
 
125
- dashboard_html = self._render(determine_template("inline"), template_params)
126
- self.context.log_artifact(
134
+ dashboard_html = file_html_template(params=template_params)
135
+ monitoring_context.log_artifact(
127
136
  artifact_name, body=dashboard_html.encode("utf-8"), format="html"
128
137
  )
129
-
130
- @staticmethod
131
- def _render(temple_func, template_params: "TemplateParams"):
132
- return temple_func(params=template_params)
@@ -0,0 +1,354 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from dataclasses import dataclass
17
+ from typing import Final, Optional, Protocol, Union, cast
18
+
19
+ import numpy as np
20
+ from pandas import DataFrame, Series
21
+
22
+ import mlrun.artifacts
23
+ import mlrun.common.model_monitoring.helpers
24
+ import mlrun.model_monitoring.applications.context as mm_context
25
+ import mlrun.model_monitoring.applications.results as mm_results
26
+ import mlrun.model_monitoring.features_drift_table as mm_drift_table
27
+ from mlrun.common.schemas.model_monitoring.constants import (
28
+ EventFieldType,
29
+ HistogramDataDriftApplicationConstants,
30
+ ResultKindApp,
31
+ ResultStatusApp,
32
+ )
33
+ from mlrun.model_monitoring.applications import (
34
+ ModelMonitoringApplicationBase,
35
+ )
36
+ from mlrun.model_monitoring.metrics.histogram_distance import (
37
+ HellingerDistance,
38
+ HistogramDistanceMetric,
39
+ KullbackLeiblerDivergence,
40
+ TotalVarianceDistance,
41
+ )
42
+
43
+
44
+ class InvalidMetricValueError(ValueError):
45
+ pass
46
+
47
+
48
+ class InvalidThresholdValueError(ValueError):
49
+ pass
50
+
51
+
52
+ class ValueClassifier(Protocol):
53
+ def value_to_status(self, value: float) -> ResultStatusApp: ...
54
+
55
+
56
+ @dataclass
57
+ class DataDriftClassifier:
58
+ """
59
+ Classify data drift numeric values into categorical status.
60
+ """
61
+
62
+ potential: float = 0.5
63
+ detected: float = 0.7
64
+
65
+ def __post_init__(self) -> None:
66
+ """Catch erroneous threshold values"""
67
+ if not 0 < self.potential < self.detected < 1:
68
+ raise InvalidThresholdValueError(
69
+ "The provided thresholds do not comply with the rules"
70
+ )
71
+
72
+ def value_to_status(self, value: float) -> ResultStatusApp:
73
+ """
74
+ Translate the numeric value into status category.
75
+
76
+ :param value: The numeric value of the data drift metric, between 0 and 1.
77
+ :returns: `ResultStatusApp` according to the classification.
78
+ """
79
+ if value > 1 or value < 0:
80
+ raise InvalidMetricValueError(
81
+ f"{value = } is invalid, must be in the range [0, 1]."
82
+ )
83
+ if value >= self.detected:
84
+ return ResultStatusApp.detected
85
+ if value >= self.potential:
86
+ return ResultStatusApp.potential_detection
87
+ return ResultStatusApp.no_detection
88
+
89
+
90
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
91
+ """
92
+ MLRun's default data drift application for model monitoring.
93
+
94
+ The application expects tabular numerical data, and calculates three metrics over the shared features' histograms.
95
+ The metrics are calculated on features that have reference data from the training dataset. When there is no
96
+ reference data (`feature_stats`), this application send a warning log and does nothing.
97
+ The three metrics are:
98
+
99
+ * Hellinger distance.
100
+ * Total variance distance.
101
+ * Kullback-Leibler divergence.
102
+
103
+ Each metric is calculated over all the features individually and the mean is taken as the metric value.
104
+ The average of Hellinger and total variance distance is taken as the result.
105
+
106
+ The application logs two artifacts:
107
+
108
+ * A JSON with the general drift per feature.
109
+ * A plotly table different metrics per feature.
110
+
111
+ This application is deployed by default when calling:
112
+
113
+ .. code-block:: python
114
+
115
+ project.enable_model_monitoring()
116
+
117
+ To avoid it, pass `deploy_histogram_data_drift_app=False`.
118
+ """
119
+
120
+ NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
121
+
122
+ _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
123
+
124
+ metrics: list[type[HistogramDistanceMetric]] = [
125
+ HellingerDistance,
126
+ KullbackLeiblerDivergence,
127
+ TotalVarianceDistance,
128
+ ]
129
+
130
+ def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
131
+ """
132
+ :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
133
+ If not provided, the default `DataDriftClassifier()` is used.
134
+ """
135
+ self._value_classifier = value_classifier or DataDriftClassifier()
136
+ assert self._REQUIRED_METRICS <= set(
137
+ self.metrics
138
+ ), "TVD and Hellinger distance are required for the general data drift result"
139
+
140
+ def _compute_metrics_per_feature(
141
+ self, monitoring_context: mm_context.MonitoringApplicationContext
142
+ ) -> DataFrame:
143
+ """Compute the metrics for the different features and labels"""
144
+ metrics_per_feature = DataFrame(
145
+ columns=[metric_class.NAME for metric_class in self.metrics]
146
+ )
147
+ feature_stats = monitoring_context.dict_to_histogram(
148
+ monitoring_context.feature_stats
149
+ )
150
+ sample_df_stats = monitoring_context.dict_to_histogram(
151
+ monitoring_context.sample_df_stats
152
+ )
153
+ for feature_name in feature_stats:
154
+ sample_hist = np.asarray(sample_df_stats[feature_name])
155
+ reference_hist = np.asarray(feature_stats[feature_name])
156
+ monitoring_context.logger.info(
157
+ "Computing metrics for feature", feature_name=feature_name
158
+ )
159
+ metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
160
+ metric.NAME: metric(
161
+ distrib_t=sample_hist, distrib_u=reference_hist
162
+ ).compute()
163
+ for metric in self.metrics
164
+ }
165
+ monitoring_context.logger.info("Finished computing the metrics")
166
+
167
+ return metrics_per_feature
168
+
169
+ def _get_general_drift_result(
170
+ self,
171
+ metrics: list[mm_results.ModelMonitoringApplicationMetric],
172
+ monitoring_context: mm_context.MonitoringApplicationContext,
173
+ metrics_per_feature: DataFrame,
174
+ ) -> mm_results.ModelMonitoringApplicationResult:
175
+ """Get the general drift result from the metrics list"""
176
+ value = cast(
177
+ float,
178
+ np.mean(
179
+ [
180
+ metric.value
181
+ for metric in metrics
182
+ if metric.name
183
+ in [
184
+ f"{HellingerDistance.NAME}_mean",
185
+ f"{TotalVarianceDistance.NAME}_mean",
186
+ ]
187
+ ]
188
+ ),
189
+ )
190
+
191
+ status = self._value_classifier.value_to_status(value)
192
+ return mm_results.ModelMonitoringApplicationResult(
193
+ name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
194
+ value=value,
195
+ kind=ResultKindApp.data_drift,
196
+ status=status,
197
+ extra_data={
198
+ EventFieldType.CURRENT_STATS: json.dumps(
199
+ monitoring_context.sample_df_stats
200
+ ),
201
+ EventFieldType.DRIFT_MEASURES: json.dumps(
202
+ metrics_per_feature.T.to_dict()
203
+ | {metric.name: metric.value for metric in metrics}
204
+ ),
205
+ EventFieldType.DRIFT_STATUS: status.value,
206
+ },
207
+ )
208
+
209
+ @staticmethod
210
+ def _get_metrics(
211
+ metrics_per_feature: DataFrame,
212
+ ) -> list[mm_results.ModelMonitoringApplicationMetric]:
213
+ """Average the metrics over the features and add the status"""
214
+ metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
215
+
216
+ metrics_mean = metrics_per_feature.mean().to_dict()
217
+
218
+ for name, value in metrics_mean.items():
219
+ metrics.append(
220
+ mm_results.ModelMonitoringApplicationMetric(
221
+ name=f"{name}_mean",
222
+ value=value,
223
+ )
224
+ )
225
+
226
+ return metrics
227
+
228
+ @staticmethod
229
+ def _get_shared_features_sample_stats(
230
+ monitoring_context: mm_context.MonitoringApplicationContext,
231
+ ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
232
+ """
233
+ Filter out features without reference data in `feature_stats`, e.g. `timestamp`.
234
+ """
235
+ return mlrun.common.model_monitoring.helpers.FeatureStats(
236
+ {
237
+ key: monitoring_context.sample_df_stats[key]
238
+ for key in monitoring_context.feature_stats
239
+ }
240
+ )
241
+
242
+ @staticmethod
243
+ def _log_json_artifact(
244
+ drift_per_feature_values: Series,
245
+ monitoring_context: mm_context.MonitoringApplicationContext,
246
+ ) -> None:
247
+ """Log the drift values as a JSON artifact"""
248
+ monitoring_context.logger.debug("Logging drift value per feature JSON artifact")
249
+ monitoring_context.log_artifact(
250
+ mlrun.artifacts.Artifact(
251
+ body=drift_per_feature_values.to_json(),
252
+ format="json",
253
+ key="features_drift_results",
254
+ )
255
+ )
256
+ monitoring_context.logger.debug("Logged JSON artifact successfully")
257
+
258
+ def _log_plotly_table_artifact(
259
+ self,
260
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
261
+ inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
262
+ metrics_per_feature: DataFrame,
263
+ drift_per_feature_values: Series,
264
+ monitoring_context: mm_context.MonitoringApplicationContext,
265
+ ) -> None:
266
+ """Log the Plotly drift table artifact"""
267
+ monitoring_context.logger.debug(
268
+ "Feature stats",
269
+ sample_set_statistics=sample_set_statistics,
270
+ inputs_statistics=inputs_statistics,
271
+ )
272
+
273
+ monitoring_context.logger.debug("Computing drift results per feature")
274
+ drift_results = {
275
+ cast(str, key): (self._value_classifier.value_to_status(value), value)
276
+ for key, value in drift_per_feature_values.items()
277
+ }
278
+ monitoring_context.logger.debug("Logging plotly artifact")
279
+ monitoring_context.log_artifact(
280
+ mm_drift_table.FeaturesDriftTablePlot().produce(
281
+ sample_set_statistics=sample_set_statistics,
282
+ inputs_statistics=inputs_statistics,
283
+ metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
284
+ drift_results=drift_results,
285
+ )
286
+ )
287
+ monitoring_context.logger.debug("Logged plotly artifact successfully")
288
+
289
+ def _log_drift_artifacts(
290
+ self,
291
+ monitoring_context: mm_context.MonitoringApplicationContext,
292
+ metrics_per_feature: DataFrame,
293
+ log_json_artifact: bool = True,
294
+ ) -> None:
295
+ """Log JSON and Plotly drift data per feature artifacts"""
296
+ drift_per_feature_values = metrics_per_feature[
297
+ [HellingerDistance.NAME, TotalVarianceDistance.NAME]
298
+ ].mean(axis=1)
299
+
300
+ if log_json_artifact:
301
+ self._log_json_artifact(drift_per_feature_values, monitoring_context)
302
+
303
+ self._log_plotly_table_artifact(
304
+ sample_set_statistics=self._get_shared_features_sample_stats(
305
+ monitoring_context
306
+ ),
307
+ inputs_statistics=monitoring_context.feature_stats,
308
+ metrics_per_feature=metrics_per_feature,
309
+ drift_per_feature_values=drift_per_feature_values,
310
+ monitoring_context=monitoring_context,
311
+ )
312
+
313
+ def do_tracking(
314
+ self,
315
+ monitoring_context: mm_context.MonitoringApplicationContext,
316
+ ) -> list[
317
+ Union[
318
+ mm_results.ModelMonitoringApplicationResult,
319
+ mm_results.ModelMonitoringApplicationMetric,
320
+ ]
321
+ ]:
322
+ """
323
+ Calculate and return the data drift metrics, averaged over the features.
324
+
325
+ Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
326
+ function arguments.
327
+ """
328
+ monitoring_context.logger.debug("Starting to run the application")
329
+ if not monitoring_context.feature_stats:
330
+ monitoring_context.logger.warning(
331
+ "No feature statistics found, skipping the application. \n"
332
+ "In order to run the application, training set must be provided when logging the model."
333
+ )
334
+ return []
335
+ metrics_per_feature = self._compute_metrics_per_feature(
336
+ monitoring_context=monitoring_context
337
+ )
338
+ monitoring_context.logger.debug("Saving artifacts")
339
+ self._log_drift_artifacts(
340
+ monitoring_context=monitoring_context,
341
+ metrics_per_feature=metrics_per_feature,
342
+ )
343
+ monitoring_context.logger.debug("Computing average per metric")
344
+ metrics = self._get_metrics(metrics_per_feature)
345
+ result = self._get_general_drift_result(
346
+ metrics=metrics,
347
+ monitoring_context=monitoring_context,
348
+ metrics_per_feature=metrics_per_feature,
349
+ )
350
+ metrics_and_result = metrics + [result]
351
+ monitoring_context.logger.debug(
352
+ "Finished running the application", results=metrics_and_result
353
+ )
354
+ return metrics_and_result
@@ -0,0 +1,99 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import json
17
+ import re
18
+ from abc import ABC, abstractmethod
19
+
20
+ import mlrun.common.helpers
21
+ import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.schemas.model_monitoring.constants as mm_constant
23
+ import mlrun.utils.v3io_clients
24
+
25
+
26
+ class _ModelMonitoringApplicationDataRes(ABC):
27
+ name: str
28
+
29
+ def __post_init__(self):
30
+ pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
31
+ if not re.fullmatch(pat, self.name):
32
+ raise mlrun.errors.MLRunValueError(
33
+ "Attribute name must comply with the regex `[a-zA-Z_][a-zA-Z0-9_]*`"
34
+ )
35
+
36
+ @abstractmethod
37
+ def to_dict(self):
38
+ raise NotImplementedError
39
+
40
+
41
+ @dataclasses.dataclass
42
+ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
43
+ """
44
+ Class representing the result of a custom model monitoring application.
45
+
46
+ :param name: (str) Name of the application result. This name must be
47
+ unique for each metric in a single application
48
+ (name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
49
+ :param value: (float) Value of the application result.
50
+ :param kind: (ResultKindApp) Kind of application result.
51
+ :param status: (ResultStatusApp) Status of the application result.
52
+ :param extra_data: (dict) Extra data associated with the application result.
53
+ """
54
+
55
+ name: str
56
+ value: float
57
+ kind: mm_constant.ResultKindApp
58
+ status: mm_constant.ResultStatusApp
59
+ extra_data: dict = dataclasses.field(default_factory=dict)
60
+
61
+ def to_dict(self):
62
+ """
63
+ Convert the object to a dictionary format suitable for writing.
64
+
65
+ :returns: (dict) Dictionary representation of the result.
66
+ """
67
+ return {
68
+ mm_constant.ResultData.RESULT_NAME: self.name,
69
+ mm_constant.ResultData.RESULT_VALUE: self.value,
70
+ mm_constant.ResultData.RESULT_KIND: self.kind.value,
71
+ mm_constant.ResultData.RESULT_STATUS: self.status.value,
72
+ mm_constant.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
73
+ }
74
+
75
+
76
+ @dataclasses.dataclass
77
+ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
78
+ """
79
+ Class representing a single metric of a custom model monitoring application.
80
+
81
+ :param name: (str) Name of the application metric. This name must be
82
+ unique for each metric in a single application
83
+ (name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
84
+ :param value: (float) Value of the application metric.
85
+ """
86
+
87
+ name: str
88
+ value: float
89
+
90
+ def to_dict(self):
91
+ """
92
+ Convert the object to a dictionary format suitable for writing.
93
+
94
+ :returns: (dict) Dictionary representation of the result.
95
+ """
96
+ return {
97
+ mm_constant.MetricData.METRIC_NAME: self.name,
98
+ mm_constant.MetricData.METRIC_VALUE: self.value,
99
+ }