mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc4.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import json
16
15
  from dataclasses import dataclass
17
16
  from typing import Final, Optional, Protocol, Union, cast
18
17
 
@@ -25,10 +24,10 @@ import mlrun.model_monitoring.applications.context as mm_context
25
24
  import mlrun.model_monitoring.applications.results as mm_results
26
25
  import mlrun.model_monitoring.features_drift_table as mm_drift_table
27
26
  from mlrun.common.schemas.model_monitoring.constants import (
28
- EventFieldType,
29
27
  HistogramDataDriftApplicationConstants,
30
28
  ResultKindApp,
31
29
  ResultStatusApp,
30
+ StatsKind,
32
31
  )
33
32
  from mlrun.model_monitoring.applications import (
34
33
  ModelMonitoringApplicationBase,
@@ -103,23 +102,28 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
103
102
  Each metric is calculated over all the features individually and the mean is taken as the metric value.
104
103
  The average of Hellinger and total variance distance is taken as the result.
105
104
 
106
- The application logs two artifacts:
105
+ The application can log two artifacts (disabled by default due to performance issues):
107
106
 
108
- * A JSON with the general drift per feature.
109
- * A plotly table different metrics per feature.
107
+ * JSON with the general drift value per feature.
108
+ * Plotly table with the various metrics and histograms per feature.
110
109
 
111
- This application is deployed by default when calling:
110
+ This application is deployed by default when calling
111
+ :py:func:`~mlrun.projects.MlrunProject.enable_model_monitoring`.
112
+ To avoid it, pass :code:`deploy_histogram_data_drift_app=False`.
112
113
 
113
- .. code-block:: python
114
-
115
- project.enable_model_monitoring()
116
-
117
- To avoid it, pass `deploy_histogram_data_drift_app=False`.
114
+ If you want to change the application defaults, such as the classifier or which artifacts to produce, you
115
+ need to inherit from this class and deploy it as any other model monitoring application.
116
+ Please make sure to keep the default application name. This ensures that the full functionality of the application,
117
+ including the statistics view in the UI, is available.
118
118
  """
119
119
 
120
120
  NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
121
121
 
122
122
  _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
123
+ _STATS_TYPES: tuple[StatsKind, StatsKind] = (
124
+ StatsKind.CURRENT_STATS,
125
+ StatsKind.DRIFT_MEASURES,
126
+ )
123
127
 
124
128
  metrics: list[type[HistogramDistanceMetric]] = [
125
129
  HellingerDistance,
@@ -127,16 +131,26 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
127
131
  TotalVarianceDistance,
128
132
  ]
129
133
 
130
- def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
134
+ def __init__(
135
+ self,
136
+ value_classifier: Optional[ValueClassifier] = None,
137
+ produce_json_artifact: bool = False,
138
+ produce_plotly_artifact: bool = False,
139
+ ) -> None:
131
140
  """
132
- :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
133
- If not provided, the default `DataDriftClassifier()` is used.
141
+ :param value_classifier: Classifier object that adheres to the :py:class:`~ValueClassifier` protocol.
142
+ If not provided, the default :py:class:`~DataDriftClassifier` is used.
143
+ :param produce_json_artifact: Whether to produce the JSON artifact or not, ``False`` by default.
144
+ :param produce_plotly_artifact: Whether to produce the Plotly artifact or not, ``False`` by default.
134
145
  """
135
146
  self._value_classifier = value_classifier or DataDriftClassifier()
136
147
  assert self._REQUIRED_METRICS <= set(
137
148
  self.metrics
138
149
  ), "TVD and Hellinger distance are required for the general data drift result"
139
150
 
151
+ self._produce_json_artifact = produce_json_artifact
152
+ self._produce_plotly_artifact = produce_plotly_artifact
153
+
140
154
  def _compute_metrics_per_feature(
141
155
  self, monitoring_context: mm_context.MonitoringApplicationContext
142
156
  ) -> DataFrame:
@@ -167,10 +181,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
167
181
  return metrics_per_feature
168
182
 
169
183
  def _get_general_drift_result(
170
- self,
171
- metrics: list[mm_results.ModelMonitoringApplicationMetric],
172
- monitoring_context: mm_context.MonitoringApplicationContext,
173
- metrics_per_feature: DataFrame,
184
+ self, metrics: list[mm_results.ModelMonitoringApplicationMetric]
174
185
  ) -> mm_results.ModelMonitoringApplicationResult:
175
186
  """Get the general drift result from the metrics list"""
176
187
  value = cast(
@@ -189,21 +200,12 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
189
200
  )
190
201
 
191
202
  status = self._value_classifier.value_to_status(value)
203
+
192
204
  return mm_results.ModelMonitoringApplicationResult(
193
205
  name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
194
206
  value=value,
195
207
  kind=ResultKindApp.data_drift,
196
208
  status=status,
197
- extra_data={
198
- EventFieldType.CURRENT_STATS: json.dumps(
199
- monitoring_context.sample_df_stats
200
- ),
201
- EventFieldType.DRIFT_MEASURES: json.dumps(
202
- metrics_per_feature.T.to_dict()
203
- | {metric.name: metric.value for metric in metrics}
204
- ),
205
- EventFieldType.DRIFT_STATUS: status.value,
206
- },
207
209
  )
208
210
 
209
211
  @staticmethod
@@ -225,6 +227,36 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
225
227
 
226
228
  return metrics
227
229
 
230
+ @staticmethod
231
+ def _get_stats(
232
+ metrics: list[mm_results.ModelMonitoringApplicationMetric],
233
+ metrics_per_feature: DataFrame,
234
+ monitoring_context: mm_context.MonitoringApplicationContext,
235
+ ) -> list[mm_results._ModelMonitoringApplicationStats]:
236
+ """
237
+ Return a list of the statistics.
238
+
239
+ :param metrics: the calculated metrics
240
+ :param metrics_per_feature: metric calculated per feature
241
+ :param monitoring_context: context object for current monitoring application
242
+ :returns: list of mm_results._ModelMonitoringApplicationStats for histogram data drift application
243
+ """
244
+ stats = []
245
+ for stats_type in HistogramDataDriftApplication._STATS_TYPES:
246
+ stats.append(
247
+ mm_results._ModelMonitoringApplicationStats(
248
+ name=stats_type,
249
+ stats=metrics_per_feature.T.to_dict()
250
+ | {metric.name: metric.value for metric in metrics}
251
+ if stats_type == StatsKind.DRIFT_MEASURES
252
+ else monitoring_context.sample_df_stats,
253
+ timestamp=monitoring_context.end_infer_time.isoformat(
254
+ sep=" ", timespec="microseconds"
255
+ ),
256
+ )
257
+ )
258
+ return stats
259
+
228
260
  @staticmethod
229
261
  def _get_shared_features_sample_stats(
230
262
  monitoring_context: mm_context.MonitoringApplicationContext,
@@ -275,55 +307,55 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
275
307
  cast(str, key): (self._value_classifier.value_to_status(value), value)
276
308
  for key, value in drift_per_feature_values.items()
277
309
  }
278
- monitoring_context.logger.debug("Logging plotly artifact")
279
- monitoring_context.log_artifact(
280
- mm_drift_table.FeaturesDriftTablePlot().produce(
281
- sample_set_statistics=sample_set_statistics,
282
- inputs_statistics=inputs_statistics,
283
- metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
284
- drift_results=drift_results,
285
- )
310
+ monitoring_context.logger.debug("Producing plotly artifact")
311
+ artifact = mm_drift_table.FeaturesDriftTablePlot().produce(
312
+ sample_set_statistics=sample_set_statistics,
313
+ inputs_statistics=inputs_statistics,
314
+ metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
315
+ drift_results=drift_results,
286
316
  )
317
+ monitoring_context.logger.debug("Logging plotly artifact")
318
+ monitoring_context.log_artifact(artifact)
287
319
  monitoring_context.logger.debug("Logged plotly artifact successfully")
288
320
 
289
321
  def _log_drift_artifacts(
290
322
  self,
291
323
  monitoring_context: mm_context.MonitoringApplicationContext,
292
324
  metrics_per_feature: DataFrame,
293
- log_json_artifact: bool = True,
294
325
  ) -> None:
295
326
  """Log JSON and Plotly drift data per feature artifacts"""
327
+ if not self._produce_json_artifact and not self._produce_plotly_artifact:
328
+ return
329
+
296
330
  drift_per_feature_values = metrics_per_feature[
297
331
  [HellingerDistance.NAME, TotalVarianceDistance.NAME]
298
332
  ].mean(axis=1)
299
333
 
300
- if log_json_artifact:
334
+ if self._produce_json_artifact:
301
335
  self._log_json_artifact(drift_per_feature_values, monitoring_context)
302
336
 
303
- self._log_plotly_table_artifact(
304
- sample_set_statistics=self._get_shared_features_sample_stats(
305
- monitoring_context
306
- ),
307
- inputs_statistics=monitoring_context.feature_stats,
308
- metrics_per_feature=metrics_per_feature,
309
- drift_per_feature_values=drift_per_feature_values,
310
- monitoring_context=monitoring_context,
311
- )
337
+ if self._produce_plotly_artifact:
338
+ self._log_plotly_table_artifact(
339
+ sample_set_statistics=self._get_shared_features_sample_stats(
340
+ monitoring_context
341
+ ),
342
+ inputs_statistics=monitoring_context.feature_stats,
343
+ metrics_per_feature=metrics_per_feature,
344
+ drift_per_feature_values=drift_per_feature_values,
345
+ monitoring_context=monitoring_context,
346
+ )
312
347
 
313
348
  def do_tracking(
314
- self,
315
- monitoring_context: mm_context.MonitoringApplicationContext,
349
+ self, monitoring_context: mm_context.MonitoringApplicationContext
316
350
  ) -> list[
317
351
  Union[
318
352
  mm_results.ModelMonitoringApplicationResult,
319
353
  mm_results.ModelMonitoringApplicationMetric,
354
+ mm_results._ModelMonitoringApplicationStats,
320
355
  ]
321
356
  ]:
322
357
  """
323
358
  Calculate and return the data drift metrics, averaged over the features.
324
-
325
- Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
326
- function arguments.
327
359
  """
328
360
  monitoring_context.logger.debug("Starting to run the application")
329
361
  if not monitoring_context.feature_stats:
@@ -342,13 +374,14 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
342
374
  )
343
375
  monitoring_context.logger.debug("Computing average per metric")
344
376
  metrics = self._get_metrics(metrics_per_feature)
345
- result = self._get_general_drift_result(
377
+ result = self._get_general_drift_result(metrics=metrics)
378
+ stats = self._get_stats(
346
379
  metrics=metrics,
347
380
  monitoring_context=monitoring_context,
348
381
  metrics_per_feature=metrics_per_feature,
349
382
  )
350
- metrics_and_result = metrics + [result]
383
+ metrics_result_and_stats = metrics + [result] + stats
351
384
  monitoring_context.logger.debug(
352
- "Finished running the application", results=metrics_and_result
385
+ "Finished running the application", results=metrics_result_and_stats
353
386
  )
354
- return metrics_and_result
387
+ return metrics_result_and_stats
@@ -17,20 +17,26 @@ import json
17
17
  import re
18
18
  from abc import ABC, abstractmethod
19
19
 
20
+ from pydantic.v1 import validator
21
+ from pydantic.v1.dataclasses import dataclass
22
+
20
23
  import mlrun.common.helpers
21
24
  import mlrun.common.model_monitoring.helpers
22
- import mlrun.common.schemas.model_monitoring.constants as mm_constant
25
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
23
26
  import mlrun.utils.v3io_clients
27
+ from mlrun.utils import logger
28
+
29
+ _RESULT_EXTRA_DATA_MAX_SIZE = 998
24
30
 
25
31
 
26
32
  class _ModelMonitoringApplicationDataRes(ABC):
27
33
  name: str
28
34
 
29
35
  def __post_init__(self):
30
- pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
36
+ pat = re.compile(mm_constants.RESULT_NAME_PATTERN)
31
37
  if not re.fullmatch(pat, self.name):
32
38
  raise mlrun.errors.MLRunValueError(
33
- "Attribute name must comply with the regex `[a-zA-Z_][a-zA-Z0-9_]*`"
39
+ f"Attribute name must comply with the regex `{mm_constants.RESULT_NAME_PATTERN}`"
34
40
  )
35
41
 
36
42
  @abstractmethod
@@ -38,7 +44,7 @@ class _ModelMonitoringApplicationDataRes(ABC):
38
44
  raise NotImplementedError
39
45
 
40
46
 
41
- @dataclasses.dataclass
47
+ @dataclass
42
48
  class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
43
49
  """
44
50
  Class representing the result of a custom model monitoring application.
@@ -49,13 +55,16 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
49
55
  :param value: (float) Value of the application result.
50
56
  :param kind: (ResultKindApp) Kind of application result.
51
57
  :param status: (ResultStatusApp) Status of the application result.
52
- :param extra_data: (dict) Extra data associated with the application result.
58
+ :param extra_data: (dict) Extra data associated with the application result. Note that if the extra data is
59
+ exceeding the maximum size of 998 characters, it will be ignored and a message will
60
+ be logged. In this case, we recommend logging the extra data as a separate artifact or
61
+ shortening it.
53
62
  """
54
63
 
55
64
  name: str
56
65
  value: float
57
- kind: mm_constant.ResultKindApp
58
- status: mm_constant.ResultStatusApp
66
+ kind: mm_constants.ResultKindApp
67
+ status: mm_constants.ResultStatusApp
59
68
  extra_data: dict = dataclasses.field(default_factory=dict)
60
69
 
61
70
  def to_dict(self):
@@ -65,15 +74,30 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
65
74
  :returns: (dict) Dictionary representation of the result.
66
75
  """
67
76
  return {
68
- mm_constant.ResultData.RESULT_NAME: self.name,
69
- mm_constant.ResultData.RESULT_VALUE: self.value,
70
- mm_constant.ResultData.RESULT_KIND: self.kind.value,
71
- mm_constant.ResultData.RESULT_STATUS: self.status.value,
72
- mm_constant.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
77
+ mm_constants.ResultData.RESULT_NAME: self.name,
78
+ mm_constants.ResultData.RESULT_VALUE: self.value,
79
+ mm_constants.ResultData.RESULT_KIND: self.kind.value,
80
+ mm_constants.ResultData.RESULT_STATUS: self.status.value,
81
+ mm_constants.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
73
82
  }
74
83
 
84
+ @validator("extra_data")
85
+ @classmethod
86
+ def validate_extra_data_len(cls, result_extra_data: dict):
87
+ """Ensure that the extra data is not exceeding the maximum size which is important to avoid
88
+ possible storage issues."""
89
+ extra_data_len = len(json.dumps(result_extra_data))
90
+ if extra_data_len > _RESULT_EXTRA_DATA_MAX_SIZE:
91
+ logger.warning(
92
+ f"Extra data is too long and won't be stored: {extra_data_len} characters while the maximum "
93
+ f"is {_RESULT_EXTRA_DATA_MAX_SIZE} characters."
94
+ f"Please shorten the extra data or log it as a separate artifact."
95
+ )
96
+ return {}
97
+ return result_extra_data
75
98
 
76
- @dataclasses.dataclass
99
+
100
+ @dataclass
77
101
  class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
78
102
  """
79
103
  Class representing a single metric of a custom model monitoring application.
@@ -94,6 +118,34 @@ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
94
118
  :returns: (dict) Dictionary representation of the result.
95
119
  """
96
120
  return {
97
- mm_constant.MetricData.METRIC_NAME: self.name,
98
- mm_constant.MetricData.METRIC_VALUE: self.value,
121
+ mm_constants.MetricData.METRIC_NAME: self.name,
122
+ mm_constants.MetricData.METRIC_VALUE: self.value,
123
+ }
124
+
125
+
126
+ @dataclasses.dataclass
127
+ class _ModelMonitoringApplicationStats(_ModelMonitoringApplicationDataRes):
128
+ """
129
+ Class representing the stats of histogram data drift application.
130
+
131
+ :param name (mm_constant.StatsKind) Enum mm_constant.StatsData of the stats data kind of the event
132
+ :param (str) iso format representation of the timestamp the event took place
133
+ :param stats (dict) Dictionary representation of the stats calculated for the event
134
+
135
+ """
136
+
137
+ name: mm_constants.StatsKind
138
+ timestamp: str
139
+ stats: dict = dataclasses.field(default_factory=dict)
140
+
141
+ def to_dict(self):
142
+ """
143
+ Convert the object to a dictionary format suitable for writing.
144
+
145
+ :returns: (dict) Dictionary representation of the result.
146
+ """
147
+ return {
148
+ mm_constants.StatsData.STATS_NAME: self.name,
149
+ mm_constants.StatsData.STATS: self.stats,
150
+ mm_constants.StatsData.TIMESTAMP: self.timestamp,
99
151
  }