mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,190 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import traceback
17
+ from typing import Any, Optional, Union
18
+
19
+ import mlrun.common.schemas.alert as alert_objects
20
+ import mlrun.common.schemas.model_monitoring.constants as mm_constant
21
+ import mlrun.datastore
22
+ import mlrun.model_monitoring
23
+ from mlrun.model_monitoring.helpers import get_stream_path
24
+ from mlrun.serving import GraphContext
25
+ from mlrun.serving.utils import StepToDict
26
+ from mlrun.utils import logger
27
+
28
+ from .context import MonitoringApplicationContext
29
+ from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult
30
+
31
+
32
+ class _PushToMonitoringWriter(StepToDict):
33
+ kind = "monitoring_application_stream_pusher"
34
+
35
+ def __init__(
36
+ self,
37
+ project: str,
38
+ writer_application_name: str,
39
+ stream_uri: Optional[str] = None,
40
+ name: Optional[str] = None,
41
+ ):
42
+ """
43
+ Class for pushing application results to the monitoring writer stream.
44
+
45
+ :param project: Project name.
46
+ :param writer_application_name: Writer application name.
47
+ :param stream_uri: Stream URI for pushing results.
48
+ :param name: Name of the PushToMonitoringWriter
49
+ instance default to PushToMonitoringWriter.
50
+ """
51
+ self.project = project
52
+ self.application_name_to_push = writer_application_name
53
+ self.stream_uri = stream_uri or get_stream_path(
54
+ project=self.project, function_name=self.application_name_to_push
55
+ )
56
+ self.output_stream = None
57
+ self.name = name or "PushToMonitoringWriter"
58
+
59
+ def do(
60
+ self,
61
+ event: tuple[
62
+ list[
63
+ Union[
64
+ ModelMonitoringApplicationResult, ModelMonitoringApplicationMetric
65
+ ]
66
+ ],
67
+ MonitoringApplicationContext,
68
+ ],
69
+ ) -> None:
70
+ """
71
+ Push application results to the monitoring writer stream.
72
+
73
+ :param event: Monitoring result(s) to push and the original event from the controller.
74
+ """
75
+ self._lazy_init()
76
+ application_results, application_context = event
77
+ writer_event = {
78
+ mm_constant.WriterEvent.APPLICATION_NAME: application_context.application_name,
79
+ mm_constant.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
80
+ mm_constant.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
81
+ sep=" ", timespec="microseconds"
82
+ ),
83
+ mm_constant.WriterEvent.END_INFER_TIME: application_context.end_infer_time.isoformat(
84
+ sep=" ", timespec="microseconds"
85
+ ),
86
+ }
87
+ for result in application_results:
88
+ data = result.to_dict()
89
+ if isinstance(result, ModelMonitoringApplicationResult):
90
+ writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
91
+ mm_constant.WriterEventKind.RESULT
92
+ )
93
+ data[mm_constant.ResultData.CURRENT_STATS] = json.dumps(
94
+ application_context.sample_df_stats
95
+ )
96
+ writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
97
+ else:
98
+ writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
99
+ mm_constant.WriterEventKind.METRIC
100
+ )
101
+ writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
102
+
103
+ writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
104
+ mm_constant.WriterEventKind.RESULT
105
+ if isinstance(result, ModelMonitoringApplicationResult)
106
+ else mm_constant.WriterEventKind.METRIC
107
+ )
108
+ logger.info(
109
+ f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
110
+ )
111
+ self.output_stream.push([writer_event])
112
+ logger.info(f"Pushed data to {self.stream_uri} successfully")
113
+
114
+ def _lazy_init(self):
115
+ if self.output_stream is None:
116
+ self.output_stream = mlrun.datastore.get_stream_pusher(
117
+ self.stream_uri,
118
+ )
119
+
120
+
121
+ class _PrepareMonitoringEvent(StepToDict):
122
+ def __init__(self, context: GraphContext, application_name: str) -> None:
123
+ """
124
+ Class for preparing the application event for the application step.
125
+
126
+ :param application_name: Application name.
127
+ """
128
+ self.graph_context = context
129
+ self.application_name = application_name
130
+ self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
131
+
132
+ def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
133
+ """
134
+ Prepare the application event for the application step.
135
+
136
+ :param event: Application event.
137
+ :return: Application context.
138
+ """
139
+ application_context = MonitoringApplicationContext(
140
+ graph_context=self.graph_context,
141
+ application_name=self.application_name,
142
+ event=event,
143
+ model_endpoint_dict=self.model_endpoints,
144
+ )
145
+
146
+ self.model_endpoints.setdefault(
147
+ application_context.endpoint_id, application_context.model_endpoint
148
+ )
149
+
150
+ return application_context
151
+
152
+
153
+ class _ApplicationErrorHandler(StepToDict):
154
+ def __init__(self, project: str, name: Optional[str] = None):
155
+ self.project = project
156
+ self.name = name or "ApplicationErrorHandler"
157
+
158
+ def do(self, event):
159
+ """
160
+ Handle model monitoring application error. This step will generate an event, describing the error.
161
+
162
+ :param event: Application event.
163
+ """
164
+
165
+ error_data = {
166
+ "Endpoint ID": event.body.endpoint_id,
167
+ "Application Class": event.body.application_name,
168
+ "Error": "".join(
169
+ traceback.format_exception(None, event.error, event.error.__traceback__)
170
+ ),
171
+ "Timestamp": event.timestamp,
172
+ }
173
+ logger.error("Error in application step", **error_data)
174
+
175
+ error_data["Error"] = event.error
176
+
177
+ event_data = alert_objects.Event(
178
+ kind=alert_objects.EventKind.MM_APP_FAILED,
179
+ entity=alert_objects.EventEntities(
180
+ kind=alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
181
+ project=self.project,
182
+ ids=[f"{self.project}_{event.body.application_name}"],
183
+ ),
184
+ value_dict=error_data,
185
+ )
186
+
187
+ mlrun.get_run_db().generate_event(
188
+ name=alert_objects.EventKind.MM_APP_FAILED, event_data=event_data
189
+ )
190
+ logger.info("Event generated successfully")
@@ -0,0 +1,108 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from typing import Any, Union
17
+
18
+ import mlrun.model_monitoring.applications.context as mm_context
19
+ import mlrun.model_monitoring.applications.results as mm_results
20
+ from mlrun.serving.utils import MonitoringApplicationToDict
21
+
22
+
23
+ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
24
+ """
25
+ A base class for a model monitoring application.
26
+ Inherit from this class to create a custom model monitoring application.
27
+
28
+ example for very simple custom application::
29
+
30
+ class MyApp(ApplicationBase):
31
+ def do_tracking(
32
+ self,
33
+ monitoring_context: mm_context.MonitoringApplicationContext,
34
+ ) -> ModelMonitoringApplicationResult:
35
+ monitoring_context.log_artifact(
36
+ TableArtifact(
37
+ "sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
38
+ )
39
+ )
40
+ return ModelMonitoringApplicationResult(
41
+ name="data_drift_test",
42
+ value=0.5,
43
+ kind=mm_constant.ResultKindApp.data_drift,
44
+ status=mm_constant.ResultStatusApp.detected,
45
+ )
46
+
47
+
48
+ """
49
+
50
+ kind = "monitoring_application"
51
+
52
+ def do(
53
+ self, monitoring_context: mm_context.MonitoringApplicationContext
54
+ ) -> tuple[
55
+ list[
56
+ Union[
57
+ mm_results.ModelMonitoringApplicationResult,
58
+ mm_results.ModelMonitoringApplicationMetric,
59
+ ]
60
+ ],
61
+ mm_context.MonitoringApplicationContext,
62
+ ]:
63
+ """
64
+ Process the monitoring event and return application results & metrics.
65
+
66
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring application context.
67
+ :returns: A tuple of:
68
+ [0] = list of application results that can be either from type
69
+ `ModelMonitoringApplicationResult`
70
+ or from type `ModelMonitoringApplicationResult`.
71
+ [1] = the original application event, wrapped in `MonitoringApplicationContext`
72
+ object
73
+ """
74
+ results = self.do_tracking(monitoring_context=monitoring_context)
75
+ if isinstance(results, dict):
76
+ results = [
77
+ mm_results.ModelMonitoringApplicationMetric(name=key, value=value)
78
+ for key, value in results.items()
79
+ ]
80
+ results = results if isinstance(results, list) else [results]
81
+ return results, monitoring_context
82
+
83
+ @abstractmethod
84
+ def do_tracking(
85
+ self,
86
+ monitoring_context: mm_context.MonitoringApplicationContext,
87
+ ) -> Union[
88
+ mm_results.ModelMonitoringApplicationResult,
89
+ list[
90
+ Union[
91
+ mm_results.ModelMonitoringApplicationResult,
92
+ mm_results.ModelMonitoringApplicationMetric,
93
+ ]
94
+ ],
95
+ dict[str, Any],
96
+ ]:
97
+ """
98
+ Implement this method with your custom monitoring logic.
99
+
100
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
101
+
102
+ :returns: (ModelMonitoringApplicationResult) or
103
+ (list[Union[ModelMonitoringApplicationResult,
104
+ ModelMonitoringApplicationMetric]])
105
+ or dict that contains the application metrics only (in this case the name of
106
+ each metric name is the key and the metric value is the corresponding value).
107
+ """
108
+ raise NotImplementedError
@@ -0,0 +1,341 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import socket
17
+ from typing import Any, Optional, cast
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+
22
+ import mlrun.common.constants as mlrun_constants
23
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
24
+ import mlrun.feature_store as fstore
25
+ import mlrun.features
26
+ import mlrun.serving
27
+ import mlrun.utils
28
+ from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
29
+ from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
30
+ from mlrun.model_monitoring.helpers import (
31
+ calculate_inputs_statistics,
32
+ get_endpoint_record,
33
+ )
34
+ from mlrun.model_monitoring.model_endpoint import ModelEndpoint
35
+
36
+
37
+ class MonitoringApplicationContext:
38
+ """
39
+ The monitoring context holds all the relevant information for the monitoring application,
40
+ and also it can be used for logging artifacts and results.
41
+ The monitoring context has the following attributes:
42
+
43
+ :param application_name: (str) The model monitoring application name.
44
+ :param project_name: (str) The project name.
45
+ :param project: (MlrunProject) The project object.
46
+ :param logger: (mlrun.utils.Logger) MLRun logger.
47
+ :param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
48
+ :param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
49
+ :param feature_stats: (FeatureStats) The train sample distribution dictionary.
50
+ :param sample_df: (pd.DataFrame) The new sample DataFrame.
51
+ :param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
52
+ :param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
53
+ :param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
54
+ :param endpoint_id: (str) ID of the monitored model endpoint
55
+ :param output_stream_uri: (str) URI of the output stream for results
56
+ :param model_endpoint: (ModelEndpoint) The model endpoint object.
57
+ :param feature_names: (list[str]) List of models feature names.
58
+ :param label_names: (list[str]) List of models label names.
59
+ :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
60
+ and a list of extra data items.
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ *,
66
+ graph_context: mlrun.serving.GraphContext,
67
+ application_name: str,
68
+ event: dict[str, Any],
69
+ model_endpoint_dict: dict[str, ModelEndpoint],
70
+ ) -> None:
71
+ """
72
+ Initialize a `MonitoringApplicationContext` object.
73
+ Note: this object should not be instantiated manually.
74
+
75
+ :param application_name: The application name.
76
+ :param event: The instance data dictionary.
77
+ :param model_endpoint_dict: Dictionary of model endpoints.
78
+ """
79
+ self.application_name = application_name
80
+
81
+ self.project_name = graph_context.project
82
+ self.project = mlrun.load_project(url=self.project_name)
83
+
84
+ # MLRun Logger
85
+ self.logger = mlrun.utils.create_logger(
86
+ level=mlrun.mlconf.log_level,
87
+ formatter_kind=mlrun.mlconf.log_formatter,
88
+ name="monitoring-application",
89
+ )
90
+ # Nuclio logger - `nuclio.request.Logger`.
91
+ # Note: this logger does not accept keyword arguments.
92
+ self.nuclio_logger = graph_context.logger
93
+
94
+ # event data
95
+ self.start_infer_time = pd.Timestamp(
96
+ cast(str, event.get(mm_constants.ApplicationEvent.START_INFER_TIME))
97
+ )
98
+ self.end_infer_time = pd.Timestamp(
99
+ cast(str, event.get(mm_constants.ApplicationEvent.END_INFER_TIME))
100
+ )
101
+ self.endpoint_id = cast(
102
+ str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
103
+ )
104
+ self.output_stream_uri = cast(
105
+ str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
106
+ )
107
+
108
+ self._feature_stats: Optional[FeatureStats] = None
109
+ self._sample_df_stats: Optional[FeatureStats] = None
110
+
111
+ # Default labels for the artifacts
112
+ self._default_labels = self._get_default_labels()
113
+
114
+ # Persistent data - fetched when needed
115
+ self._sample_df: Optional[pd.DataFrame] = None
116
+ self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
117
+ self.endpoint_id
118
+ )
119
+
120
+ def _get_default_labels(self) -> dict[str, str]:
121
+ return {
122
+ mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
123
+ mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
124
+ mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
125
+ mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
126
+ }
127
+
128
+ def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
129
+ """Add the default labels to logged artifacts labels"""
130
+ return (labels or {}) | self._default_labels
131
+
132
+ @property
133
+ def sample_df(self) -> pd.DataFrame:
134
+ if self._sample_df is None:
135
+ feature_set = fstore.get_feature_set(
136
+ self.model_endpoint.status.monitoring_feature_set_uri
137
+ )
138
+ features = [f"{feature_set.metadata.name}.*"]
139
+ vector = fstore.FeatureVector(
140
+ name=f"{self.endpoint_id}_vector",
141
+ features=features,
142
+ with_indexes=True,
143
+ )
144
+ vector.metadata.tag = self.application_name
145
+ vector.feature_set_objects = {feature_set.metadata.name: feature_set}
146
+
147
+ offline_response = vector.get_offline_features(
148
+ start_time=self.start_infer_time,
149
+ end_time=self.end_infer_time,
150
+ timestamp_for_filtering=mm_constants.FeatureSetFeatures.time_stamp(),
151
+ )
152
+ self._sample_df = offline_response.to_dataframe().reset_index(drop=True)
153
+ return self._sample_df
154
+
155
+ @property
156
+ def model_endpoint(self) -> ModelEndpoint:
157
+ if not self._model_endpoint:
158
+ self._model_endpoint = ModelEndpoint.from_flat_dict(
159
+ get_endpoint_record(self.project_name, self.endpoint_id)
160
+ )
161
+ return self._model_endpoint
162
+
163
+ @property
164
+ def feature_stats(self) -> FeatureStats:
165
+ if not self._feature_stats:
166
+ self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
167
+ pad_features_hist(self._feature_stats)
168
+ return self._feature_stats
169
+
170
+ @property
171
+ def sample_df_stats(self) -> FeatureStats:
172
+ """statistics of the sample dataframe"""
173
+ if not self._sample_df_stats:
174
+ self._sample_df_stats = calculate_inputs_statistics(
175
+ self.feature_stats, self.sample_df
176
+ )
177
+ return self._sample_df_stats
178
+
179
+ @property
180
+ def feature_names(self) -> list[str]:
181
+ """The feature names of the model"""
182
+ feature_names = self.model_endpoint.spec.feature_names
183
+ return (
184
+ feature_names
185
+ if isinstance(feature_names, list)
186
+ else json.loads(feature_names)
187
+ )
188
+
189
+ @property
190
+ def label_names(self) -> list[str]:
191
+ """The label names of the model"""
192
+ label_names = self.model_endpoint.spec.label_names
193
+ return label_names if isinstance(label_names, list) else json.loads(label_names)
194
+
195
+ @property
196
+ def model(self) -> tuple[str, ModelArtifact, dict]:
197
+ """The model file, model spec object, and a list of extra data items"""
198
+ return get_model(self.model_endpoint.spec.model_uri)
199
+
200
+ @staticmethod
201
+ def dict_to_histogram(histogram_dict: FeatureStats) -> pd.DataFrame:
202
+ """
203
+ Convert histogram dictionary to pandas DataFrame with feature histograms as columns
204
+
205
+ :param histogram_dict: Histogram dictionary
206
+
207
+ :returns: Histogram dataframe
208
+ """
209
+
210
+ # Create a dictionary with feature histograms as values
211
+ histograms = {}
212
+ for feature, stats in histogram_dict.items():
213
+ if "hist" in stats:
214
+ # Normalize to probability distribution of each feature
215
+ histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
216
+
217
+ # Convert the dictionary to pandas DataFrame
218
+ histograms = pd.DataFrame(histograms)
219
+
220
+ return histograms
221
+
222
+ def log_artifact(
223
+ self,
224
+ item,
225
+ body=None,
226
+ tag: str = "",
227
+ local_path: str = "",
228
+ artifact_path: Optional[str] = None,
229
+ format: Optional[str] = None,
230
+ upload: Optional[bool] = None,
231
+ labels: Optional[dict[str, str]] = None,
232
+ target_path: Optional[str] = None,
233
+ **kwargs,
234
+ ) -> Artifact:
235
+ """
236
+ Log an artifact.
237
+ See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
238
+ """
239
+ labels = self._add_default_labels(labels)
240
+ return self.project.log_artifact(
241
+ item,
242
+ body=body,
243
+ tag=tag,
244
+ local_path=local_path,
245
+ artifact_path=artifact_path,
246
+ format=format,
247
+ upload=upload,
248
+ labels=labels,
249
+ target_path=target_path,
250
+ **kwargs,
251
+ )
252
+
253
+ def log_dataset(
254
+ self,
255
+ key,
256
+ df,
257
+ tag="",
258
+ local_path=None,
259
+ artifact_path=None,
260
+ upload=None,
261
+ labels=None,
262
+ format="",
263
+ preview=None,
264
+ stats=None,
265
+ target_path="",
266
+ extra_data=None,
267
+ label_column: Optional[str] = None,
268
+ **kwargs,
269
+ ) -> DatasetArtifact:
270
+ """
271
+ Log a dataset artifact.
272
+ See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
273
+ """
274
+ labels = self._add_default_labels(labels)
275
+ return self.project.log_dataset(
276
+ key,
277
+ df,
278
+ tag=tag,
279
+ local_path=local_path,
280
+ artifact_path=artifact_path,
281
+ upload=upload,
282
+ labels=labels,
283
+ format=format,
284
+ preview=preview,
285
+ stats=stats,
286
+ target_path=target_path,
287
+ extra_data=extra_data,
288
+ label_column=label_column,
289
+ **kwargs,
290
+ )
291
+
292
+ def log_model(
293
+ self,
294
+ key,
295
+ body=None,
296
+ framework="",
297
+ tag="",
298
+ model_dir=None,
299
+ model_file=None,
300
+ algorithm=None,
301
+ metrics=None,
302
+ parameters=None,
303
+ artifact_path=None,
304
+ upload=None,
305
+ labels=None,
306
+ inputs: Optional[list[mlrun.features.Feature]] = None,
307
+ outputs: Optional[list[mlrun.features.Feature]] = None,
308
+ feature_vector: Optional[str] = None,
309
+ feature_weights: Optional[list] = None,
310
+ training_set=None,
311
+ label_column=None,
312
+ extra_data=None,
313
+ **kwargs,
314
+ ) -> ModelArtifact:
315
+ """
316
+ Log a model artifact.
317
+ See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
318
+ """
319
+ labels = self._add_default_labels(labels)
320
+ return self.project.log_model(
321
+ key,
322
+ body=body,
323
+ framework=framework,
324
+ tag=tag,
325
+ model_dir=model_dir,
326
+ model_file=model_file,
327
+ algorithm=algorithm,
328
+ metrics=metrics,
329
+ parameters=parameters,
330
+ artifact_path=artifact_path,
331
+ upload=upload,
332
+ labels=labels,
333
+ inputs=inputs,
334
+ outputs=outputs,
335
+ feature_vector=feature_vector,
336
+ feature_weights=feature_weights,
337
+ training_set=training_set,
338
+ label_column=label_column,
339
+ extra_data=extra_data,
340
+ **kwargs,
341
+ )