mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (160) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +169 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +5 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +30 -9
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/inmem.py +2 -2
  41. mlrun/datastore/redis.py +2 -2
  42. mlrun/datastore/s3.py +5 -0
  43. mlrun/datastore/sources.py +147 -7
  44. mlrun/datastore/store_resources.py +7 -7
  45. mlrun/datastore/targets.py +110 -42
  46. mlrun/datastore/utils.py +42 -0
  47. mlrun/db/base.py +54 -10
  48. mlrun/db/httpdb.py +282 -79
  49. mlrun/db/nopdb.py +52 -10
  50. mlrun/errors.py +11 -0
  51. mlrun/execution.py +26 -9
  52. mlrun/feature_store/__init__.py +0 -2
  53. mlrun/feature_store/api.py +12 -47
  54. mlrun/feature_store/feature_set.py +9 -0
  55. mlrun/feature_store/feature_vector.py +8 -0
  56. mlrun/feature_store/ingestion.py +7 -6
  57. mlrun/feature_store/retrieval/base.py +9 -4
  58. mlrun/feature_store/retrieval/conversion.py +9 -9
  59. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  60. mlrun/feature_store/retrieval/job.py +9 -3
  61. mlrun/feature_store/retrieval/local_merger.py +2 -0
  62. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  63. mlrun/frameworks/__init__.py +6 -0
  64. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  65. mlrun/frameworks/parallel_coordinates.py +2 -1
  66. mlrun/frameworks/tf_keras/__init__.py +4 -1
  67. mlrun/k8s_utils.py +10 -11
  68. mlrun/launcher/base.py +4 -3
  69. mlrun/launcher/client.py +5 -3
  70. mlrun/launcher/local.py +12 -2
  71. mlrun/launcher/remote.py +9 -2
  72. mlrun/lists.py +6 -2
  73. mlrun/model.py +47 -21
  74. mlrun/model_monitoring/__init__.py +1 -1
  75. mlrun/model_monitoring/api.py +42 -18
  76. mlrun/model_monitoring/application.py +5 -305
  77. mlrun/model_monitoring/applications/__init__.py +11 -0
  78. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  79. mlrun/model_monitoring/applications/base.py +280 -0
  80. mlrun/model_monitoring/applications/context.py +214 -0
  81. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  82. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  83. mlrun/model_monitoring/applications/results.py +99 -0
  84. mlrun/model_monitoring/controller.py +3 -1
  85. mlrun/model_monitoring/db/__init__.py +2 -0
  86. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  87. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  88. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  89. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  90. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  91. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  92. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  93. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  94. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  95. mlrun/model_monitoring/db/tsdb/base.py +316 -0
  96. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  97. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  98. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  100. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +401 -0
  101. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  102. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  103. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +658 -0
  104. mlrun/model_monitoring/evidently_application.py +6 -118
  105. mlrun/model_monitoring/helpers.py +63 -1
  106. mlrun/model_monitoring/model_endpoint.py +3 -2
  107. mlrun/model_monitoring/stream_processing.py +57 -216
  108. mlrun/model_monitoring/writer.py +134 -124
  109. mlrun/package/__init__.py +13 -1
  110. mlrun/package/packagers/__init__.py +6 -1
  111. mlrun/package/utils/_formatter.py +2 -2
  112. mlrun/platforms/__init__.py +10 -9
  113. mlrun/platforms/iguazio.py +21 -202
  114. mlrun/projects/operations.py +24 -12
  115. mlrun/projects/pipelines.py +79 -102
  116. mlrun/projects/project.py +271 -103
  117. mlrun/render.py +15 -14
  118. mlrun/run.py +16 -46
  119. mlrun/runtimes/__init__.py +6 -3
  120. mlrun/runtimes/base.py +14 -7
  121. mlrun/runtimes/daskjob.py +1 -0
  122. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  123. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  124. mlrun/runtimes/funcdoc.py +0 -28
  125. mlrun/runtimes/kubejob.py +2 -1
  126. mlrun/runtimes/local.py +12 -3
  127. mlrun/runtimes/mpijob/__init__.py +0 -20
  128. mlrun/runtimes/mpijob/v1.py +1 -1
  129. mlrun/runtimes/nuclio/api_gateway.py +194 -84
  130. mlrun/runtimes/nuclio/application/application.py +170 -8
  131. mlrun/runtimes/nuclio/function.py +39 -49
  132. mlrun/runtimes/pod.py +16 -36
  133. mlrun/runtimes/remotesparkjob.py +9 -3
  134. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  135. mlrun/runtimes/utils.py +6 -45
  136. mlrun/serving/__init__.py +8 -1
  137. mlrun/serving/server.py +2 -1
  138. mlrun/serving/states.py +51 -8
  139. mlrun/serving/utils.py +19 -11
  140. mlrun/serving/v2_serving.py +5 -1
  141. mlrun/track/tracker.py +2 -1
  142. mlrun/utils/async_http.py +25 -5
  143. mlrun/utils/helpers.py +157 -83
  144. mlrun/utils/logger.py +39 -7
  145. mlrun/utils/notifications/notification/__init__.py +14 -9
  146. mlrun/utils/notifications/notification/base.py +1 -1
  147. mlrun/utils/notifications/notification/slack.py +34 -7
  148. mlrun/utils/notifications/notification/webhook.py +1 -1
  149. mlrun/utils/notifications/notification_pusher.py +147 -16
  150. mlrun/utils/regex.py +9 -0
  151. mlrun/utils/v3io_clients.py +0 -1
  152. mlrun/utils/version/version.json +2 -2
  153. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/METADATA +14 -6
  154. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/RECORD +158 -138
  155. mlrun/kfpops.py +0 -865
  156. mlrun/platforms/other.py +0 -305
  157. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/LICENSE +0 -0
  158. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/WHEEL +0 -0
  159. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/entry_points.txt +0 -0
  160. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,280 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from typing import Any, Union, cast
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ import mlrun
22
+ import mlrun.model_monitoring.applications.context as mm_context
23
+ import mlrun.model_monitoring.applications.results as mm_results
24
+ from mlrun.serving.utils import MonitoringApplicationToDict
25
+
26
+
27
+ class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
28
+ """
29
+ A base class for a model monitoring application.
30
+ Inherit from this class to create a custom model monitoring application.
31
+
32
+ example for very simple custom application::
33
+
34
+ class MyApp(ApplicationBase):
35
+ def do_tracking(
36
+ self,
37
+ monitoring_context: mm_context.MonitoringApplicationContext,
38
+ ) -> ModelMonitoringApplicationResult:
39
+ monitoring_context.log_artifact(
40
+ TableArtifact(
41
+ "sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
42
+ )
43
+ )
44
+ return ModelMonitoringApplicationResult(
45
+ name="data_drift_test",
46
+ value=0.5,
47
+ kind=mm_constant.ResultKindApp.data_drift,
48
+ status=mm_constant.ResultStatusApp.detected,
49
+ )
50
+
51
+
52
+ """
53
+
54
+ kind = "monitoring_application"
55
+
56
+ def do(
57
+ self, monitoring_context: mm_context.MonitoringApplicationContext
58
+ ) -> tuple[
59
+ list[
60
+ Union[
61
+ mm_results.ModelMonitoringApplicationResult,
62
+ mm_results.ModelMonitoringApplicationMetric,
63
+ ]
64
+ ],
65
+ mm_context.MonitoringApplicationContext,
66
+ ]:
67
+ """
68
+ Process the monitoring event and return application results & metrics.
69
+
70
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring application context.
71
+ :returns: A tuple of:
72
+ [0] = list of application results that can be either from type
73
+ `ModelMonitoringApplicationResult`
74
+ or from type `ModelMonitoringApplicationResult`.
75
+ [1] = the original application event, wrapped in `MonitoringApplicationContext`
76
+ object
77
+ """
78
+ results = self.do_tracking(monitoring_context=monitoring_context)
79
+ if isinstance(results, dict):
80
+ results = [
81
+ mm_results.ModelMonitoringApplicationMetric(name=key, value=value)
82
+ for key, value in results.items()
83
+ ]
84
+ results = results if isinstance(results, list) else [results]
85
+ return results, monitoring_context
86
+
87
+ @abstractmethod
88
+ def do_tracking(
89
+ self,
90
+ monitoring_context: mm_context.MonitoringApplicationContext,
91
+ ) -> Union[
92
+ mm_results.ModelMonitoringApplicationResult,
93
+ list[
94
+ Union[
95
+ mm_results.ModelMonitoringApplicationResult,
96
+ mm_results.ModelMonitoringApplicationMetric,
97
+ ]
98
+ ],
99
+ dict[str, Any],
100
+ ]:
101
+ """
102
+ Implement this method with your custom monitoring logic.
103
+
104
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
105
+
106
+ :returns: (ModelMonitoringApplicationResult) or
107
+ (list[Union[ModelMonitoringApplicationResult,
108
+ ModelMonitoringApplicationMetric]])
109
+ or dict that contains the application metrics only (in this case the name of
110
+ each metric name is the key and the metric value is the corresponding value).
111
+ """
112
+ raise NotImplementedError
113
+
114
+
115
+ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
116
+ """
117
+ A base class for a model monitoring application.
118
+ Inherit from this class to create a custom model monitoring application.
119
+
120
+ example for very simple custom application::
121
+
122
+ class MyApp(ApplicationBase):
123
+ def do_tracking(
124
+ self,
125
+ sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
126
+ feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
127
+ start_infer_time: pd.Timestamp,
128
+ end_infer_time: pd.Timestamp,
129
+ schedule_time: pd.Timestamp,
130
+ latest_request: pd.Timestamp,
131
+ endpoint_id: str,
132
+ output_stream_uri: str,
133
+ ) -> ModelMonitoringApplicationResult:
134
+ self.context.log_artifact(
135
+ TableArtifact(
136
+ "sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
137
+ )
138
+ )
139
+ return ModelMonitoringApplicationResult(
140
+ name="data_drift_test",
141
+ value=0.5,
142
+ kind=mm_constant.ResultKindApp.data_drift,
143
+ status=mm_constant.ResultStatusApp.detected,
144
+ )
145
+
146
+
147
+ """
148
+
149
+ kind = "monitoring_application"
150
+
151
+ def do(
152
+ self, monitoring_context: mm_context.MonitoringApplicationContext
153
+ ) -> tuple[
154
+ list[mm_results.ModelMonitoringApplicationResult],
155
+ mm_context.MonitoringApplicationContext,
156
+ ]:
157
+ """
158
+ Process the monitoring event and return application results.
159
+
160
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
161
+ :returns: A tuple of:
162
+ [0] = list of application results that can be either from type
163
+ `ModelMonitoringApplicationResult` or from type
164
+ `ModelMonitoringApplicationResult`.
165
+ [1] = the original application event, wrapped in `MonitoringApplicationContext`
166
+ object
167
+ """
168
+ resolved_event = self._resolve_event(monitoring_context)
169
+ if not (
170
+ hasattr(self, "context") and isinstance(self.context, mlrun.MLClientCtx)
171
+ ):
172
+ self._lazy_init(monitoring_context)
173
+ results = self.do_tracking(*resolved_event)
174
+ results = results if isinstance(results, list) else [results]
175
+ return results, monitoring_context
176
+
177
+ def _lazy_init(self, monitoring_context: mm_context.MonitoringApplicationContext):
178
+ self.context = cast(mlrun.MLClientCtx, monitoring_context)
179
+
180
+ @abstractmethod
181
+ def do_tracking(
182
+ self,
183
+ application_name: str,
184
+ sample_df_stats: pd.DataFrame,
185
+ feature_stats: pd.DataFrame,
186
+ sample_df: pd.DataFrame,
187
+ start_infer_time: pd.Timestamp,
188
+ end_infer_time: pd.Timestamp,
189
+ latest_request: pd.Timestamp,
190
+ endpoint_id: str,
191
+ output_stream_uri: str,
192
+ ) -> Union[
193
+ mm_results.ModelMonitoringApplicationResult,
194
+ list[mm_results.ModelMonitoringApplicationResult],
195
+ ]:
196
+ """
197
+ Implement this method with your custom monitoring logic.
198
+
199
+ :param application_name: (str) the app name
200
+ :param sample_df_stats: (pd.DataFrame) The new sample distribution.
201
+ :param feature_stats: (pd.DataFrame) The train sample distribution.
202
+ :param sample_df: (pd.DataFrame) The new sample DataFrame.
203
+ :param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
204
+ :param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
205
+ :param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
206
+ :param endpoint_id: (str) ID of the monitored model endpoint
207
+ :param output_stream_uri: (str) URI of the output stream for results
208
+
209
+ :returns: (ModelMonitoringApplicationResult) or
210
+ (list[ModelMonitoringApplicationResult]) of the application results.
211
+ """
212
+ raise NotImplementedError
213
+
214
+ @classmethod
215
+ def _resolve_event(
216
+ cls,
217
+ monitoring_context: mm_context.MonitoringApplicationContext,
218
+ ) -> tuple[
219
+ str,
220
+ pd.DataFrame,
221
+ pd.DataFrame,
222
+ pd.DataFrame,
223
+ pd.Timestamp,
224
+ pd.Timestamp,
225
+ pd.Timestamp,
226
+ str,
227
+ str,
228
+ ]:
229
+ """
230
+ Converting the event into a single tuple that will be used for passing the event arguments to the running
231
+ application
232
+
233
+ :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
234
+
235
+ :return: A tuple of:
236
+ [0] = (str) application name
237
+ [1] = (pd.DataFrame) current input statistics
238
+ [2] = (pd.DataFrame) train statistics
239
+ [3] = (pd.DataFrame) current input data
240
+ [4] = (pd.Timestamp) start time of the monitoring schedule
241
+ [5] = (pd.Timestamp) end time of the monitoring schedule
242
+ [6] = (pd.Timestamp) timestamp of the latest request
243
+ [7] = (str) endpoint id
244
+ [8] = (str) output stream uri
245
+ """
246
+ return (
247
+ monitoring_context.application_name,
248
+ cls.dict_to_histogram(monitoring_context.sample_df_stats),
249
+ cls.dict_to_histogram(monitoring_context.feature_stats),
250
+ monitoring_context.sample_df,
251
+ monitoring_context.start_infer_time,
252
+ monitoring_context.end_infer_time,
253
+ monitoring_context.latest_request,
254
+ monitoring_context.endpoint_id,
255
+ monitoring_context.output_stream_uri,
256
+ )
257
+
258
+ @staticmethod
259
+ def dict_to_histogram(
260
+ histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
261
+ ) -> pd.DataFrame:
262
+ """
263
+ Convert histogram dictionary to pandas DataFrame with feature histograms as columns
264
+
265
+ :param histogram_dict: Histogram dictionary
266
+
267
+ :returns: Histogram dataframe
268
+ """
269
+
270
+ # Create a dictionary with feature histograms as values
271
+ histograms = {}
272
+ for feature, stats in histogram_dict.items():
273
+ if "hist" in stats:
274
+ # Normalize to probability distribution of each feature
275
+ histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
276
+
277
+ # Convert the dictionary to pandas DataFrame
278
+ histograms = pd.DataFrame(histograms)
279
+
280
+ return histograms
@@ -0,0 +1,214 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import json
15
+ import typing
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ import mlrun.common.helpers
21
+ import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
23
+ import mlrun.feature_store as fstore
24
+ from mlrun.artifacts.model import ModelArtifact, get_model
25
+ from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
26
+ from mlrun.execution import MLClientCtx
27
+ from mlrun.model_monitoring.helpers import (
28
+ calculate_inputs_statistics,
29
+ get_endpoint_record,
30
+ )
31
+ from mlrun.model_monitoring.model_endpoint import ModelEndpoint
32
+
33
+
34
+ class MonitoringApplicationContext(MLClientCtx):
35
+ """
36
+ The monitoring context holds all the relevant information for the monitoring application,
37
+ and also it can be used for logging artifacts and results.
38
+ The monitoring context has the following attributes:
39
+
40
+ :param application_name: (str) the app name
41
+ :param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
42
+ :param feature_stats: (FeatureStats) The train sample distribution dictionary.
43
+ :param sample_df: (pd.DataFrame) The new sample DataFrame.
44
+ :param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
45
+ :param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
46
+ :param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
47
+ :param endpoint_id: (str) ID of the monitored model endpoint
48
+ :param output_stream_uri: (str) URI of the output stream for results
49
+ :param model_endpoint: (ModelEndpoint) The model endpoint object.
50
+ :param feature_names: (list[str]) List of models feature names.
51
+ :param label_names: (list[str]) List of models label names.
52
+ :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object, and list of
53
+
54
+ """
55
+
56
+ def __init__(self, **kwargs):
57
+ super().__init__(**kwargs)
58
+
59
+ def __post_init__(self):
60
+ self.application_name: typing.Optional[str] = None
61
+ self.start_infer_time: typing.Optional[pd.Timestamp] = None
62
+ self.end_infer_time: typing.Optional[pd.Timestamp] = None
63
+ self.latest_request: typing.Optional[pd.Timestamp] = None
64
+ self.endpoint_id: typing.Optional[str] = None
65
+ self.output_stream_uri: typing.Optional[str] = None
66
+
67
+ self._sample_df: typing.Optional[pd.DataFrame] = None
68
+ self._model_endpoint: typing.Optional[ModelEndpoint] = None
69
+ self._feature_stats: typing.Optional[FeatureStats] = None
70
+ self._sample_df_stats: typing.Optional[FeatureStats] = None
71
+
72
+ @classmethod
73
+ def from_dict(
74
+ cls,
75
+ attrs: dict,
76
+ context=None,
77
+ model_endpoint_dict=None,
78
+ **kwargs,
79
+ ) -> "MonitoringApplicationContext":
80
+ """
81
+ Create an instance of the MonitoringApplicationContext from a dictionary.
82
+
83
+ :param attrs: The instance data dictionary.
84
+ :param context: The current application context.
85
+ :param model_endpoint_dict: Dictionary of model endpoints.
86
+
87
+ """
88
+
89
+ if not context:
90
+ self = (
91
+ super().from_dict(
92
+ attrs=attrs.get(mm_constants.ApplicationEvent.MLRUN_CONTEXT, {}),
93
+ **kwargs,
94
+ ),
95
+ )
96
+ else:
97
+ self = context
98
+ self.__post_init__()
99
+
100
+ self.start_infer_time = pd.Timestamp(
101
+ attrs.get(mm_constants.ApplicationEvent.START_INFER_TIME)
102
+ )
103
+ self.end_infer_time = pd.Timestamp(
104
+ attrs.get(mm_constants.ApplicationEvent.END_INFER_TIME)
105
+ )
106
+ self.latest_request = pd.Timestamp(
107
+ attrs.get(mm_constants.ApplicationEvent.LAST_REQUEST)
108
+ )
109
+ self.application_name = attrs.get(
110
+ mm_constants.ApplicationEvent.APPLICATION_NAME
111
+ )
112
+ self._feature_stats = json.loads(
113
+ attrs.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
114
+ )
115
+ self._sample_df_stats = json.loads(
116
+ attrs.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
117
+ )
118
+
119
+ self.endpoint_id = attrs.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
120
+ self._model_endpoint = model_endpoint_dict.get(self.endpoint_id)
121
+
122
+ return self
123
+
124
+ @property
125
+ def sample_df(self) -> pd.DataFrame:
126
+ if not hasattr(self, "_sample_df") or self._sample_df is None:
127
+ feature_set = fstore.get_feature_set(
128
+ self.model_endpoint.status.monitoring_feature_set_uri
129
+ )
130
+ features = [f"{feature_set.metadata.name}.*"]
131
+ vector = fstore.FeatureVector(
132
+ name=f"{self.endpoint_id}_vector",
133
+ features=features,
134
+ with_indexes=True,
135
+ )
136
+ vector.metadata.tag = self.application_name
137
+ vector.feature_set_objects = {feature_set.metadata.name: feature_set}
138
+
139
+ offline_response = vector.get_offline_features(
140
+ start_time=self.start_infer_time,
141
+ end_time=self.end_infer_time,
142
+ timestamp_for_filtering=mm_constants.FeatureSetFeatures.time_stamp(),
143
+ )
144
+ self._sample_df = offline_response.to_dataframe().reset_index(drop=True)
145
+ return self._sample_df
146
+
147
+ @property
148
+ def model_endpoint(self) -> ModelEndpoint:
149
+ if not hasattr(self, "_model_endpoint") or not self._model_endpoint:
150
+ self._model_endpoint = ModelEndpoint.from_flat_dict(
151
+ get_endpoint_record(self.project, self.endpoint_id)
152
+ )
153
+ return self._model_endpoint
154
+
155
+ @property
156
+ def feature_stats(self) -> FeatureStats:
157
+ if not hasattr(self, "_feature_stats") or not self._feature_stats:
158
+ self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
159
+ pad_features_hist(self._feature_stats)
160
+ return self._feature_stats
161
+
162
+ @property
163
+ def sample_df_stats(self) -> FeatureStats:
164
+ """statistics of the sample dataframe"""
165
+ if not hasattr(self, "_sample_df_stats") or not self._sample_df_stats:
166
+ self._sample_df_stats = calculate_inputs_statistics(
167
+ self.feature_stats, self.sample_df
168
+ )
169
+ return self._sample_df_stats
170
+
171
+ @property
172
+ def feature_names(self) -> list[str]:
173
+ """The feature names of the model"""
174
+ feature_names = self.model_endpoint.spec.feature_names
175
+ return (
176
+ feature_names
177
+ if isinstance(feature_names, list)
178
+ else json.loads(feature_names)
179
+ )
180
+
181
+ @property
182
+ def label_names(self) -> list[str]:
183
+ """The label names of the model"""
184
+ label_names = self.model_endpoint.spec.label_names
185
+ return label_names if isinstance(label_names, list) else json.loads(label_names)
186
+
187
+ @property
188
+ def model(self) -> tuple[str, ModelArtifact, dict]:
189
+ """return model file, model spec object, and list of extra data items"""
190
+ return get_model(self.model_endpoint.spec.model_uri)
191
+
192
+ @staticmethod
193
+ def dict_to_histogram(
194
+ histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
195
+ ) -> pd.DataFrame:
196
+ """
197
+ Convert histogram dictionary to pandas DataFrame with feature histograms as columns
198
+
199
+ :param histogram_dict: Histogram dictionary
200
+
201
+ :returns: Histogram dataframe
202
+ """
203
+
204
+ # Create a dictionary with feature histograms as values
205
+ histograms = {}
206
+ for feature, stats in histogram_dict.items():
207
+ if "hist" in stats:
208
+ # Normalize to probability distribution of each feature
209
+ histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
210
+
211
+ # Convert the dictionary to pandas DataFrame
212
+ histograms = pd.DataFrame(histograms)
213
+
214
+ return histograms