mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (59) hide show
  1. mlrun/alerts/alert.py +30 -27
  2. mlrun/common/constants.py +3 -0
  3. mlrun/common/helpers.py +0 -1
  4. mlrun/common/schemas/alert.py +3 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
  6. mlrun/common/schemas/notification.py +1 -0
  7. mlrun/config.py +1 -1
  8. mlrun/data_types/to_pandas.py +9 -9
  9. mlrun/datastore/alibaba_oss.py +3 -2
  10. mlrun/datastore/azure_blob.py +7 -9
  11. mlrun/datastore/base.py +13 -1
  12. mlrun/datastore/dbfs_store.py +3 -7
  13. mlrun/datastore/filestore.py +1 -3
  14. mlrun/datastore/google_cloud_storage.py +84 -29
  15. mlrun/datastore/redis.py +1 -0
  16. mlrun/datastore/s3.py +3 -2
  17. mlrun/datastore/sources.py +54 -0
  18. mlrun/datastore/storeytargets.py +147 -0
  19. mlrun/datastore/targets.py +76 -122
  20. mlrun/datastore/v3io.py +1 -0
  21. mlrun/db/httpdb.py +6 -1
  22. mlrun/errors.py +8 -0
  23. mlrun/execution.py +7 -0
  24. mlrun/feature_store/api.py +5 -0
  25. mlrun/feature_store/retrieval/job.py +1 -0
  26. mlrun/model.py +24 -3
  27. mlrun/model_monitoring/api.py +10 -2
  28. mlrun/model_monitoring/applications/_application_steps.py +52 -34
  29. mlrun/model_monitoring/applications/context.py +206 -70
  30. mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
  31. mlrun/model_monitoring/controller.py +15 -12
  32. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
  33. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
  34. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
  37. mlrun/model_monitoring/helpers.py +54 -18
  38. mlrun/model_monitoring/stream_processing.py +10 -29
  39. mlrun/projects/pipelines.py +19 -30
  40. mlrun/projects/project.py +86 -67
  41. mlrun/run.py +8 -6
  42. mlrun/runtimes/__init__.py +4 -0
  43. mlrun/runtimes/nuclio/api_gateway.py +18 -0
  44. mlrun/runtimes/nuclio/application/application.py +150 -59
  45. mlrun/runtimes/nuclio/function.py +5 -11
  46. mlrun/runtimes/nuclio/serving.py +2 -2
  47. mlrun/runtimes/utils.py +16 -0
  48. mlrun/serving/routers.py +1 -1
  49. mlrun/serving/server.py +19 -5
  50. mlrun/serving/states.py +8 -0
  51. mlrun/serving/v2_serving.py +34 -26
  52. mlrun/utils/helpers.py +33 -2
  53. mlrun/utils/version/version.json +2 -2
  54. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
  55. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
  56. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
  57. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
  58. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
  59. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
@@ -147,8 +147,7 @@ def record_results(
147
147
  on the provided `endpoint_id`.
148
148
  :param function_name: If a new model endpoint is created, use this function name for generating the
149
149
  function URI.
150
- :param context: MLRun context. Note that the context is required for logging the artifacts
151
- following the batch drift job.
150
+ :param context: MLRun context. Note that the context is required generating the model endpoint.
152
151
  :param infer_results_df: DataFrame that will be stored under the model endpoint parquet target. Will be
153
152
  used for doing the drift analysis. Please make sure that the dataframe includes
154
153
  both feature names and label columns.
@@ -616,7 +615,16 @@ def _create_model_monitoring_function_base(
616
615
  app_step = prepare_step.to(class_name=application_class, **application_kwargs)
617
616
  else:
618
617
  app_step = prepare_step.to(class_name=application_class)
618
+
619
619
  app_step.__class__ = mlrun.serving.MonitoringApplicationStep
620
+
621
+ app_step.error_handler(
622
+ name="ApplicationErrorHandler",
623
+ class_name="mlrun.model_monitoring.applications._application_steps._ApplicationErrorHandler",
624
+ full_event=True,
625
+ project=project,
626
+ )
627
+
620
628
  app_step.to(
621
629
  class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
622
630
  name="PushToMonitoringWriter",
@@ -11,18 +11,16 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import json
15
- import typing
16
- from typing import Optional
16
+ from typing import Any, Optional, Union
17
17
 
18
- import mlrun.common.helpers
19
- import mlrun.common.model_monitoring.helpers
18
+ import mlrun.common.schemas.alert as alert_objects
20
19
  import mlrun.common.schemas.model_monitoring.constants as mm_constant
21
20
  import mlrun.datastore
22
- import mlrun.serving
23
- import mlrun.utils.helpers
24
- import mlrun.utils.v3io_clients
21
+ import mlrun.model_monitoring
25
22
  from mlrun.model_monitoring.helpers import get_stream_path
23
+ from mlrun.serving import GraphContext
26
24
  from mlrun.serving.utils import StepToDict
27
25
  from mlrun.utils import logger
28
26
 
@@ -61,7 +59,7 @@ class _PushToMonitoringWriter(StepToDict):
61
59
  self,
62
60
  event: tuple[
63
61
  list[
64
- typing.Union[
62
+ Union[
65
63
  ModelMonitoringApplicationResult, ModelMonitoringApplicationMetric
66
64
  ]
67
65
  ],
@@ -120,47 +118,67 @@ class _PushToMonitoringWriter(StepToDict):
120
118
 
121
119
 
122
120
  class _PrepareMonitoringEvent(StepToDict):
123
- def __init__(self, application_name: str):
121
+ def __init__(self, context: GraphContext, application_name: str) -> None:
124
122
  """
125
123
  Class for preparing the application event for the application step.
126
124
 
127
125
  :param application_name: Application name.
128
126
  """
127
+ self.graph_context = context
128
+ self.application_name = application_name
129
+ self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
129
130
 
130
- self.context = self._create_mlrun_context(application_name)
131
- self.model_endpoints = {}
132
-
133
- def do(self, event: dict[str, dict]) -> MonitoringApplicationContext:
131
+ def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
134
132
  """
135
133
  Prepare the application event for the application step.
136
134
 
137
135
  :param event: Application event.
138
- :return: Application event.
136
+ :return: Application context.
139
137
  """
140
- if not event.get("mlrun_context"):
141
- application_context = MonitoringApplicationContext().from_dict(
142
- event,
143
- context=self.context,
144
- model_endpoint_dict=self.model_endpoints,
145
- )
146
- else:
147
- application_context = MonitoringApplicationContext().from_dict(event)
138
+ application_context = MonitoringApplicationContext(
139
+ graph_context=self.graph_context,
140
+ application_name=self.application_name,
141
+ event=event,
142
+ model_endpoint_dict=self.model_endpoints,
143
+ )
144
+
148
145
  self.model_endpoints.setdefault(
149
146
  application_context.endpoint_id, application_context.model_endpoint
150
147
  )
148
+
151
149
  return application_context
152
150
 
153
- @staticmethod
154
- def _create_mlrun_context(app_name: str):
155
- artifact_path = mlrun.utils.helpers.template_artifact_path(
156
- mlrun.mlconf.artifact_path, mlrun.mlconf.default_project
157
- )
158
- context = mlrun.get_or_create_ctx(
159
- f"{app_name}-logger",
160
- spec={
161
- "metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}},
162
- "spec": {mlrun.utils.helpers.RunKeys.output_path: artifact_path},
151
+
152
+ class _ApplicationErrorHandler(StepToDict):
153
+ def __init__(self, project: str, name: Optional[str] = None):
154
+ self.project = project
155
+ self.name = name or "ApplicationErrorHandler"
156
+
157
+ def do(self, event):
158
+ """
159
+ Handle model monitoring application error. This step will generate an event, describing the error.
160
+
161
+ :param event: Application event.
162
+ """
163
+
164
+ logger.error(f"Error in application step: {event}")
165
+
166
+ event_data = alert_objects.Event(
167
+ kind=alert_objects.EventKind.MM_APP_FAILED,
168
+ entity=alert_objects.EventEntities(
169
+ kind=alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
170
+ project=self.project,
171
+ ids=[f"{self.project}_{event.body.application_name}"],
172
+ ),
173
+ value_dict={
174
+ "Error": event.error,
175
+ "Timestamp": event.timestamp,
176
+ "Application Class": event.body.application_name,
177
+ "Endpoint ID": event.body.endpoint_id,
163
178
  },
164
179
  )
165
- context.__class__ = MonitoringApplicationContext
166
- return context
180
+
181
+ mlrun.get_run_db().generate_event(
182
+ name=alert_objects.EventKind.MM_APP_FAILED, event_data=event_data
183
+ )
184
+ logger.info("Event generated successfully")
@@ -11,19 +11,22 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import json
15
- import typing
16
+ import socket
17
+ from typing import Any, Optional, cast
16
18
 
17
19
  import numpy as np
18
20
  import pandas as pd
19
21
 
20
- import mlrun.common.helpers
21
- import mlrun.common.model_monitoring.helpers
22
+ import mlrun.common.constants as mlrun_constants
22
23
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
23
24
  import mlrun.feature_store as fstore
24
- from mlrun.artifacts.model import ModelArtifact, get_model
25
+ import mlrun.features
26
+ import mlrun.serving
27
+ import mlrun.utils
28
+ from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
25
29
  from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
26
- from mlrun.execution import MLClientCtx
27
30
  from mlrun.model_monitoring.helpers import (
28
31
  calculate_inputs_statistics,
29
32
  get_endpoint_record,
@@ -31,13 +34,17 @@ from mlrun.model_monitoring.helpers import (
31
34
  from mlrun.model_monitoring.model_endpoint import ModelEndpoint
32
35
 
33
36
 
34
- class MonitoringApplicationContext(MLClientCtx):
37
+ class MonitoringApplicationContext:
35
38
  """
36
39
  The monitoring context holds all the relevant information for the monitoring application,
37
40
  and also it can be used for logging artifacts and results.
38
41
  The monitoring context has the following attributes:
39
42
 
40
- :param application_name: (str) the app name
43
+ :param application_name: (str) The model monitoring application name.
44
+ :param project_name: (str) The project name.
45
+ :param project: (MlrunProject) The project object.
46
+ :param logger: (mlrun.utils.Logger) MLRun logger.
47
+ :param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
41
48
  :param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
42
49
  :param feature_stats: (FeatureStats) The train sample distribution dictionary.
43
50
  :param sample_df: (pd.DataFrame) The new sample DataFrame.
@@ -49,79 +56,89 @@ class MonitoringApplicationContext(MLClientCtx):
49
56
  :param model_endpoint: (ModelEndpoint) The model endpoint object.
50
57
  :param feature_names: (list[str]) List of models feature names.
51
58
  :param label_names: (list[str]) List of models label names.
52
- :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object, and list of
53
-
59
+ :param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
60
+ and a list of extra data items.
54
61
  """
55
62
 
56
- def __init__(self, **kwargs):
57
- super().__init__(**kwargs)
58
-
59
- def _enrich_data(self):
60
- self.application_name: typing.Optional[str] = None
61
- self.start_infer_time: typing.Optional[pd.Timestamp] = None
62
- self.end_infer_time: typing.Optional[pd.Timestamp] = None
63
- self.latest_request: typing.Optional[pd.Timestamp] = None
64
- self.endpoint_id: typing.Optional[str] = None
65
- self.output_stream_uri: typing.Optional[str] = None
66
-
67
- self._sample_df: typing.Optional[pd.DataFrame] = None
68
- self._model_endpoint: typing.Optional[ModelEndpoint] = None
69
- self._feature_stats: typing.Optional[FeatureStats] = None
70
- self._sample_df_stats: typing.Optional[FeatureStats] = None
71
-
72
- @classmethod
73
- def from_dict(
74
- cls,
75
- attrs: dict,
76
- context=None,
77
- model_endpoint_dict=None,
78
- **kwargs,
79
- ) -> "MonitoringApplicationContext":
63
+ def __init__(
64
+ self,
65
+ *,
66
+ graph_context: mlrun.serving.GraphContext,
67
+ application_name: str,
68
+ event: dict[str, Any],
69
+ model_endpoint_dict: dict[str, ModelEndpoint],
70
+ ) -> None:
80
71
  """
81
- Create an instance of the MonitoringApplicationContext from a dictionary.
72
+ Initialize a `MonitoringApplicationContext` object.
73
+ Note: this object should not be instantiated manually.
82
74
 
83
- :param attrs: The instance data dictionary.
84
- :param context: The current application context.
75
+ :param application_name: The application name.
76
+ :param event: The instance data dictionary.
85
77
  :param model_endpoint_dict: Dictionary of model endpoints.
86
-
87
78
  """
79
+ self.application_name = application_name
88
80
 
89
- if not context:
90
- ctx = (
91
- super().from_dict(
92
- attrs=attrs.get(mm_constants.ApplicationEvent.MLRUN_CONTEXT, {}),
93
- **kwargs,
94
- ),
95
- )
96
- else:
97
- ctx = context
98
- cls._enrich_data(ctx)
81
+ self.project_name = graph_context.project
82
+ self.project = mlrun.load_project(url=self.project_name)
83
+
84
+ # MLRun Logger
85
+ self.logger = mlrun.utils.create_logger(
86
+ level=mlrun.mlconf.log_level,
87
+ formatter_kind=mlrun.mlconf.log_formatter,
88
+ name="monitoring-application",
89
+ )
90
+ # Nuclio logger - `nuclio.request.Logger`.
91
+ # Note: this logger does not accept keyword arguments.
92
+ self.nuclio_logger = graph_context.logger
99
93
 
100
- ctx.start_infer_time = pd.Timestamp(
101
- attrs.get(mm_constants.ApplicationEvent.START_INFER_TIME)
94
+ # event data
95
+ self.start_infer_time = pd.Timestamp(
96
+ cast(str, event.get(mm_constants.ApplicationEvent.START_INFER_TIME))
102
97
  )
103
- ctx.end_infer_time = pd.Timestamp(
104
- attrs.get(mm_constants.ApplicationEvent.END_INFER_TIME)
98
+ self.end_infer_time = pd.Timestamp(
99
+ cast(str, event.get(mm_constants.ApplicationEvent.END_INFER_TIME))
105
100
  )
106
- ctx.latest_request = pd.Timestamp(
107
- attrs.get(mm_constants.ApplicationEvent.LAST_REQUEST)
101
+ self.latest_request = pd.Timestamp(
102
+ cast(str, event.get(mm_constants.ApplicationEvent.LAST_REQUEST))
108
103
  )
109
- ctx.application_name = attrs.get(mm_constants.ApplicationEvent.APPLICATION_NAME)
110
- ctx._feature_stats = json.loads(
111
- attrs.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
104
+ self.endpoint_id = cast(
105
+ str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
112
106
  )
113
- ctx._sample_df_stats = json.loads(
114
- attrs.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
107
+ self.output_stream_uri = cast(
108
+ str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
115
109
  )
116
110
 
117
- ctx.endpoint_id = attrs.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
118
- ctx._model_endpoint = model_endpoint_dict.get(ctx.endpoint_id)
111
+ self._feature_stats: Optional[FeatureStats] = json.loads(
112
+ event.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
113
+ )
114
+ self._sample_df_stats: Optional[FeatureStats] = json.loads(
115
+ event.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
116
+ )
119
117
 
120
- return ctx
118
+ # Default labels for the artifacts
119
+ self._default_labels = self._get_default_labels()
120
+
121
+ # Persistent data - fetched when needed
122
+ self._sample_df: Optional[pd.DataFrame] = None
123
+ self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
124
+ self.endpoint_id
125
+ )
126
+
127
+ def _get_default_labels(self) -> dict[str, str]:
128
+ return {
129
+ mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
130
+ mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
131
+ mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
132
+ mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
133
+ }
134
+
135
+ def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
136
+ """Add the default labels to logged artifacts labels"""
137
+ return (labels or {}) | self._default_labels
121
138
 
122
139
  @property
123
140
  def sample_df(self) -> pd.DataFrame:
124
- if not hasattr(self, "_sample_df") or self._sample_df is None:
141
+ if self._sample_df is None:
125
142
  feature_set = fstore.get_feature_set(
126
143
  self.model_endpoint.status.monitoring_feature_set_uri
127
144
  )
@@ -144,15 +161,15 @@ class MonitoringApplicationContext(MLClientCtx):
144
161
 
145
162
  @property
146
163
  def model_endpoint(self) -> ModelEndpoint:
147
- if not hasattr(self, "_model_endpoint") or not self._model_endpoint:
164
+ if not self._model_endpoint:
148
165
  self._model_endpoint = ModelEndpoint.from_flat_dict(
149
- get_endpoint_record(self.project, self.endpoint_id)
166
+ get_endpoint_record(self.project_name, self.endpoint_id)
150
167
  )
151
168
  return self._model_endpoint
152
169
 
153
170
  @property
154
171
  def feature_stats(self) -> FeatureStats:
155
- if not hasattr(self, "_feature_stats") or not self._feature_stats:
172
+ if not self._feature_stats:
156
173
  self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
157
174
  pad_features_hist(self._feature_stats)
158
175
  return self._feature_stats
@@ -160,7 +177,7 @@ class MonitoringApplicationContext(MLClientCtx):
160
177
  @property
161
178
  def sample_df_stats(self) -> FeatureStats:
162
179
  """statistics of the sample dataframe"""
163
- if not hasattr(self, "_sample_df_stats") or not self._sample_df_stats:
180
+ if not self._sample_df_stats:
164
181
  self._sample_df_stats = calculate_inputs_statistics(
165
182
  self.feature_stats, self.sample_df
166
183
  )
@@ -184,13 +201,11 @@ class MonitoringApplicationContext(MLClientCtx):
184
201
 
185
202
  @property
186
203
  def model(self) -> tuple[str, ModelArtifact, dict]:
187
- """return model file, model spec object, and list of extra data items"""
204
+ """The model file, model spec object, and a list of extra data items"""
188
205
  return get_model(self.model_endpoint.spec.model_uri)
189
206
 
190
207
  @staticmethod
191
- def dict_to_histogram(
192
- histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
193
- ) -> pd.DataFrame:
208
+ def dict_to_histogram(histogram_dict: FeatureStats) -> pd.DataFrame:
194
209
  """
195
210
  Convert histogram dictionary to pandas DataFrame with feature histograms as columns
196
211
 
@@ -210,3 +225,124 @@ class MonitoringApplicationContext(MLClientCtx):
210
225
  histograms = pd.DataFrame(histograms)
211
226
 
212
227
  return histograms
228
+
229
+ def log_artifact(
230
+ self,
231
+ item,
232
+ body=None,
233
+ tag: str = "",
234
+ local_path: str = "",
235
+ artifact_path: Optional[str] = None,
236
+ format: Optional[str] = None,
237
+ upload: Optional[bool] = None,
238
+ labels: Optional[dict[str, str]] = None,
239
+ target_path: Optional[str] = None,
240
+ **kwargs,
241
+ ) -> Artifact:
242
+ """
243
+ Log an artifact.
244
+ See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
245
+ """
246
+ labels = self._add_default_labels(labels)
247
+ return self.project.log_artifact(
248
+ item,
249
+ body=body,
250
+ tag=tag,
251
+ local_path=local_path,
252
+ artifact_path=artifact_path,
253
+ format=format,
254
+ upload=upload,
255
+ labels=labels,
256
+ target_path=target_path,
257
+ **kwargs,
258
+ )
259
+
260
+ def log_dataset(
261
+ self,
262
+ key,
263
+ df,
264
+ tag="",
265
+ local_path=None,
266
+ artifact_path=None,
267
+ upload=None,
268
+ labels=None,
269
+ format="",
270
+ preview=None,
271
+ stats=None,
272
+ target_path="",
273
+ extra_data=None,
274
+ label_column: Optional[str] = None,
275
+ **kwargs,
276
+ ) -> DatasetArtifact:
277
+ """
278
+ Log a dataset artifact.
279
+ See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
280
+ """
281
+ labels = self._add_default_labels(labels)
282
+ return self.project.log_dataset(
283
+ key,
284
+ df,
285
+ tag=tag,
286
+ local_path=local_path,
287
+ artifact_path=artifact_path,
288
+ upload=upload,
289
+ labels=labels,
290
+ format=format,
291
+ preview=preview,
292
+ stats=stats,
293
+ target_path=target_path,
294
+ extra_data=extra_data,
295
+ label_column=label_column,
296
+ **kwargs,
297
+ )
298
+
299
+ def log_model(
300
+ self,
301
+ key,
302
+ body=None,
303
+ framework="",
304
+ tag="",
305
+ model_dir=None,
306
+ model_file=None,
307
+ algorithm=None,
308
+ metrics=None,
309
+ parameters=None,
310
+ artifact_path=None,
311
+ upload=None,
312
+ labels=None,
313
+ inputs: Optional[list[mlrun.features.Feature]] = None,
314
+ outputs: Optional[list[mlrun.features.Feature]] = None,
315
+ feature_vector: Optional[str] = None,
316
+ feature_weights: Optional[list] = None,
317
+ training_set=None,
318
+ label_column=None,
319
+ extra_data=None,
320
+ **kwargs,
321
+ ) -> ModelArtifact:
322
+ """
323
+ Log a model artifact.
324
+ See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
325
+ """
326
+ labels = self._add_default_labels(labels)
327
+ return self.project.log_model(
328
+ key,
329
+ body=body,
330
+ framework=framework,
331
+ tag=tag,
332
+ model_dir=model_dir,
333
+ model_file=model_file,
334
+ algorithm=algorithm,
335
+ metrics=metrics,
336
+ parameters=parameters,
337
+ artifact_path=artifact_path,
338
+ upload=upload,
339
+ labels=labels,
340
+ inputs=inputs,
341
+ outputs=outputs,
342
+ feature_vector=feature_vector,
343
+ feature_weights=feature_weights,
344
+ training_set=training_set,
345
+ label_column=label_column,
346
+ extra_data=extra_data,
347
+ **kwargs,
348
+ )
@@ -91,7 +91,9 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
91
91
  """
92
92
  MLRun's default data drift application for model monitoring.
93
93
 
94
- The application expects tabular numerical data, and calculates three metrics over the features' histograms.
94
+ The application expects tabular numerical data, and calculates three metrics over the shared features' histograms.
95
+ The metrics are calculated on features that have reference data from the training dataset. When there is no
96
+ reference data (`feature_stats`), this application send a warning log and does nothing.
95
97
  The three metrics are:
96
98
 
97
99
  * Hellinger distance.
@@ -112,6 +114,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
112
114
 
113
115
  project.enable_model_monitoring()
114
116
 
117
+ To avoid it, pass `deploy_histogram_data_drift_app=False`.
115
118
  """
116
119
 
117
120
  NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
@@ -223,19 +226,18 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
223
226
  return metrics
224
227
 
225
228
  @staticmethod
226
- def _remove_timestamp_feature(
227
- sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
229
+ def _get_shared_features_sample_stats(
230
+ monitoring_context: mm_context.MonitoringApplicationContext,
228
231
  ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
229
232
  """
230
- Drop the 'timestamp' feature if it exists, as it is irrelevant
231
- in the plotly artifact
233
+ Filter out features without reference data in `feature_stats`, e.g. `timestamp`.
232
234
  """
233
- sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
234
- sample_set_statistics.copy()
235
+ return mlrun.common.model_monitoring.helpers.FeatureStats(
236
+ {
237
+ key: monitoring_context.sample_df_stats[key]
238
+ for key in monitoring_context.feature_stats
239
+ }
235
240
  )
236
- if EventFieldType.TIMESTAMP in sample_set_statistics:
237
- del sample_set_statistics[EventFieldType.TIMESTAMP]
238
- return sample_set_statistics
239
241
 
240
242
  @staticmethod
241
243
  def _log_json_artifact(
@@ -299,8 +301,8 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
299
301
  self._log_json_artifact(drift_per_feature_values, monitoring_context)
300
302
 
301
303
  self._log_plotly_table_artifact(
302
- sample_set_statistics=self._remove_timestamp_feature(
303
- monitoring_context.sample_df_stats
304
+ sample_set_statistics=self._get_shared_features_sample_stats(
305
+ monitoring_context
304
306
  ),
305
307
  inputs_statistics=monitoring_context.feature_stats,
306
308
  metrics_per_feature=metrics_per_feature,
@@ -325,7 +327,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
325
327
  """
326
328
  monitoring_context.logger.debug("Starting to run the application")
327
329
  if not monitoring_context.feature_stats:
328
- monitoring_context.logger.info(
330
+ monitoring_context.logger.warning(
329
331
  "No feature statistics found, skipping the application. \n"
330
332
  "In order to run the application, training set must be provided when logging the model."
331
333
  )
@@ -335,19 +335,23 @@ class MonitoringApplicationController:
335
335
  return
336
336
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
337
337
  if monitoring_functions:
338
- # Gets only application in ready state
339
338
  applications_names = list(
340
- {
341
- app.metadata.name
342
- for app in monitoring_functions
343
- if (
344
- app.status.state == "ready"
345
- # workaround for the default app, as its `status.state` is `None`
346
- or app.metadata.name
347
- == mm_constants.HistogramDataDriftApplicationConstants.NAME
348
- )
349
- }
339
+ {app.metadata.name for app in monitoring_functions}
350
340
  )
341
+ # if monitoring_functions: - TODO : ML-7700
342
+ # Gets only application in ready state
343
+ # applications_names = list(
344
+ # {
345
+ # app.metadata.name
346
+ # for app in monitoring_functions
347
+ # if (
348
+ # app.status.state == "ready"
349
+ # # workaround for the default app, as its `status.state` is `None`
350
+ # or app.metadata.name
351
+ # == mm_constants.HistogramDataDriftApplicationConstants.NAME
352
+ # )
353
+ # }
354
+ # )
351
355
  if not applications_names:
352
356
  logger.info("No monitoring functions found", project=self.project)
353
357
  return
@@ -592,7 +596,6 @@ class MonitoringApplicationController:
592
596
  project=project,
593
597
  function_name=mm_constants.MonitoringFunctionNames.WRITER,
594
598
  ),
595
- mm_constants.ApplicationEvent.MLRUN_CONTEXT: {}, # TODO : for future use by ad-hoc batch infer
596
599
  }
597
600
  for app_name in applications_names:
598
601
  data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})