mlrun 1.7.0rc3__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (69) hide show
  1. mlrun/artifacts/manager.py +6 -1
  2. mlrun/common/constants.py +1 -0
  3. mlrun/common/model_monitoring/helpers.py +12 -6
  4. mlrun/common/schemas/__init__.py +1 -0
  5. mlrun/common/schemas/client_spec.py +1 -0
  6. mlrun/common/schemas/common.py +40 -0
  7. mlrun/common/schemas/model_monitoring/constants.py +4 -1
  8. mlrun/common/schemas/project.py +2 -0
  9. mlrun/config.py +20 -15
  10. mlrun/datastore/azure_blob.py +22 -9
  11. mlrun/datastore/base.py +15 -25
  12. mlrun/datastore/datastore.py +19 -8
  13. mlrun/datastore/datastore_profile.py +47 -5
  14. mlrun/datastore/google_cloud_storage.py +10 -6
  15. mlrun/datastore/hdfs.py +51 -0
  16. mlrun/datastore/redis.py +4 -0
  17. mlrun/datastore/s3.py +4 -0
  18. mlrun/datastore/sources.py +29 -43
  19. mlrun/datastore/targets.py +58 -48
  20. mlrun/datastore/utils.py +2 -49
  21. mlrun/datastore/v3io.py +4 -0
  22. mlrun/db/base.py +34 -0
  23. mlrun/db/httpdb.py +71 -42
  24. mlrun/execution.py +3 -3
  25. mlrun/feature_store/feature_vector.py +2 -2
  26. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  27. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  28. mlrun/k8s_utils.py +10 -5
  29. mlrun/kfpops.py +19 -10
  30. mlrun/model.py +5 -0
  31. mlrun/model_monitoring/api.py +3 -3
  32. mlrun/model_monitoring/application.py +1 -1
  33. mlrun/model_monitoring/applications/__init__.py +13 -0
  34. mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
  35. mlrun/model_monitoring/batch.py +9 -111
  36. mlrun/model_monitoring/controller.py +73 -55
  37. mlrun/model_monitoring/controller_handler.py +13 -5
  38. mlrun/model_monitoring/features_drift_table.py +62 -53
  39. mlrun/model_monitoring/helpers.py +30 -21
  40. mlrun/model_monitoring/metrics/__init__.py +13 -0
  41. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  42. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
  43. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  44. mlrun/package/packagers/pandas_packagers.py +3 -3
  45. mlrun/package/utils/_archiver.py +3 -1
  46. mlrun/platforms/iguazio.py +8 -65
  47. mlrun/projects/pipelines.py +21 -11
  48. mlrun/projects/project.py +121 -42
  49. mlrun/runtimes/base.py +21 -2
  50. mlrun/runtimes/kubejob.py +5 -3
  51. mlrun/runtimes/local.py +2 -2
  52. mlrun/runtimes/mpijob/abstract.py +6 -6
  53. mlrun/runtimes/nuclio/function.py +9 -9
  54. mlrun/runtimes/nuclio/serving.py +3 -3
  55. mlrun/runtimes/pod.py +3 -3
  56. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  57. mlrun/serving/remote.py +4 -2
  58. mlrun/serving/server.py +2 -8
  59. mlrun/utils/async_http.py +3 -3
  60. mlrun/utils/helpers.py +27 -5
  61. mlrun/utils/http.py +3 -3
  62. mlrun/utils/notifications/notification_pusher.py +6 -6
  63. mlrun/utils/version/version.json +2 -2
  64. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA +13 -16
  65. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/RECORD +69 -63
  66. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/LICENSE +0 -0
  67. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/WHEEL +0 -0
  68. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/entry_points.txt +0 -0
  69. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,218 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Final, Optional, Protocol
17
+
18
+ import numpy as np
19
+ from pandas import DataFrame, Timestamp
20
+
21
+ from mlrun.common.schemas.model_monitoring.constants import (
22
+ MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME,
23
+ ResultKindApp,
24
+ ResultStatusApp,
25
+ )
26
+ from mlrun.model_monitoring.application import (
27
+ ModelMonitoringApplicationBase,
28
+ ModelMonitoringApplicationResult,
29
+ )
30
+ from mlrun.model_monitoring.batch import (
31
+ HellingerDistance,
32
+ HistogramDistanceMetric,
33
+ KullbackLeiblerDivergence,
34
+ TotalVarianceDistance,
35
+ )
36
+
37
+
38
+ class InvalidMetricValueError(ValueError):
39
+ pass
40
+
41
+
42
+ class InvalidThresholdValueError(ValueError):
43
+ pass
44
+
45
+
46
+ class ValueClassifier(Protocol):
47
+ def value_to_status(self, value: float) -> ResultStatusApp: ...
48
+
49
+
50
+ @dataclass
51
+ class DataDriftClassifier:
52
+ """
53
+ Classify data drift numeric values into categorical status.
54
+ """
55
+
56
+ potential: float = 0.5
57
+ detected: float = 0.7
58
+
59
+ def __post_init__(self) -> None:
60
+ """Catch erroneous threshold values"""
61
+ if not 0 < self.potential < self.detected < 1:
62
+ raise InvalidThresholdValueError(
63
+ "The provided thresholds do not comply with the rules"
64
+ )
65
+
66
+ def value_to_status(self, value: float) -> ResultStatusApp:
67
+ """
68
+ Translate the numeric value into status category.
69
+
70
+ :param value: The numeric value of the data drift metric, between 0 and 1.
71
+ :returns: `ResultStatusApp` according to the classification.
72
+ """
73
+ if value > 1 or value < 0:
74
+ raise InvalidMetricValueError(
75
+ f"{value = } is invalid, must be in the range [0, 1]."
76
+ )
77
+ if value >= self.detected:
78
+ return ResultStatusApp.detected
79
+ if value >= self.potential:
80
+ return ResultStatusApp.potential_detection
81
+ return ResultStatusApp.no_detection
82
+
83
+
84
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
85
+ """
86
+ MLRun's default data drift application for model monitoring.
87
+
88
+ The application calculates the metrics over the features' histograms.
89
+ Each metric is calculated over all the features, the mean is taken,
90
+ and the status is returned.
91
+ """
92
+
93
+ NAME: Final[str] = MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME
94
+ METRIC_KIND: Final[ResultKindApp] = ResultKindApp.data_drift
95
+
96
+ _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
97
+
98
+ metrics: list[type[HistogramDistanceMetric]] = [
99
+ HellingerDistance,
100
+ KullbackLeiblerDivergence,
101
+ TotalVarianceDistance,
102
+ ]
103
+
104
+ def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
105
+ """
106
+ Initialize the data drift application.
107
+
108
+ :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
109
+ If not provided, the default `DataDriftClassifier()` is used.
110
+ """
111
+ self._value_classifier = value_classifier or DataDriftClassifier()
112
+ assert self._REQUIRED_METRICS <= set(
113
+ self.metrics
114
+ ), "TVD and Hellinger distance are required for the general data drift result"
115
+
116
+ def _compute_metrics_per_feature(
117
+ self, sample_df_stats: DataFrame, feature_stats: DataFrame
118
+ ) -> dict[type[HistogramDistanceMetric], list[float]]:
119
+ """Compute the metrics for the different features and labels"""
120
+ metrics_per_feature: dict[type[HistogramDistanceMetric], list[float]] = {
121
+ metric_class: [] for metric_class in self.metrics
122
+ }
123
+
124
+ for (sample_feat, sample_hist), (reference_feat, reference_hist) in zip(
125
+ sample_df_stats.items(), feature_stats.items()
126
+ ):
127
+ assert sample_feat == reference_feat, "The features do not match"
128
+ self.context.logger.info(
129
+ "Computing metrics for feature", feature_name=sample_feat
130
+ )
131
+ sample_arr = np.asarray(sample_hist)
132
+ reference_arr = np.asarray(reference_hist)
133
+ for metric in self.metrics:
134
+ metric_name = metric.NAME
135
+ self.context.logger.debug(
136
+ "Computing data drift metric",
137
+ metric_name=metric_name,
138
+ feature_name=sample_feat,
139
+ )
140
+ metrics_per_feature[metric].append(
141
+ metric(distrib_t=sample_arr, distrib_u=reference_arr).compute()
142
+ )
143
+ self.context.logger.info("Finished computing the metrics")
144
+
145
+ return metrics_per_feature
146
+
147
+ def _add_general_drift_result(
148
+ self, results: list[ModelMonitoringApplicationResult], value: float
149
+ ) -> None:
150
+ results.append(
151
+ ModelMonitoringApplicationResult(
152
+ name="general_drift",
153
+ value=value,
154
+ kind=self.METRIC_KIND,
155
+ status=self._value_classifier.value_to_status(value),
156
+ )
157
+ )
158
+
159
+ def _get_results(
160
+ self, metrics_per_feature: dict[type[HistogramDistanceMetric], list[float]]
161
+ ) -> list[ModelMonitoringApplicationResult]:
162
+ """Average the metrics over the features and add the status"""
163
+ results: list[ModelMonitoringApplicationResult] = []
164
+ hellinger_tvd_values: list[float] = []
165
+ for metric_class, metric_values in metrics_per_feature.items():
166
+ self.context.logger.debug(
167
+ "Averaging metric over the features", metric_name=metric_class.NAME
168
+ )
169
+ value = np.mean(metric_values)
170
+ if metric_class == KullbackLeiblerDivergence:
171
+ # This metric is not bounded from above [0, inf).
172
+ # No status is currently reported for KL divergence
173
+ status = ResultStatusApp.irrelevant
174
+ else:
175
+ status = self._value_classifier.value_to_status(value)
176
+ if metric_class in self._REQUIRED_METRICS:
177
+ hellinger_tvd_values.append(value)
178
+ results.append(
179
+ ModelMonitoringApplicationResult(
180
+ name=f"{metric_class.NAME}_mean",
181
+ value=value,
182
+ kind=self.METRIC_KIND,
183
+ status=status,
184
+ )
185
+ )
186
+
187
+ self._add_general_drift_result(
188
+ results=results, value=np.mean(hellinger_tvd_values)
189
+ )
190
+
191
+ return results
192
+
193
+ def do_tracking(
194
+ self,
195
+ application_name: str,
196
+ sample_df_stats: DataFrame,
197
+ feature_stats: DataFrame,
198
+ sample_df: DataFrame,
199
+ start_infer_time: Timestamp,
200
+ end_infer_time: Timestamp,
201
+ latest_request: Timestamp,
202
+ endpoint_id: str,
203
+ output_stream_uri: str,
204
+ ) -> list[ModelMonitoringApplicationResult]:
205
+ """
206
+ Calculate and return the data drift metrics, averaged over the features.
207
+
208
+ Refer to `ModelMonitoringApplicationBase` for the meaning of the
209
+ function arguments.
210
+ """
211
+ self.context.logger.debug("Starting to run the application")
212
+ metrics_per_feature = self._compute_metrics_per_feature(
213
+ sample_df_stats=sample_df_stats, feature_stats=feature_stats
214
+ )
215
+ self.context.logger.debug("Computing average per metric")
216
+ results = self._get_results(metrics_per_feature)
217
+ self.context.logger.debug("Finished running the application", results=results)
218
+ return results
@@ -11,15 +11,13 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
- import abc
14
+
16
15
  import collections
17
- import dataclasses
18
16
  import datetime
19
17
  import json
20
18
  import os
21
19
  import re
22
- from typing import Any, ClassVar, Optional, Union
20
+ from typing import Any, Optional, Union
23
21
 
24
22
  import numpy as np
25
23
  import pandas as pd
@@ -35,118 +33,18 @@ import mlrun.common.schemas.model_monitoring
35
33
  import mlrun.data_types.infer
36
34
  import mlrun.feature_store as fstore
37
35
  import mlrun.utils.v3io_clients
36
+ from mlrun.model_monitoring.metrics.histogram_distance import (
37
+ HellingerDistance,
38
+ HistogramDistanceMetric,
39
+ KullbackLeiblerDivergence,
40
+ TotalVarianceDistance,
41
+ )
38
42
  from mlrun.utils import logger
39
43
 
40
44
  # A type for representing a drift result, a tuple of the status and the drift mean:
41
45
  DriftResultType = tuple[mlrun.common.schemas.model_monitoring.DriftStatus, float]
42
46
 
43
47
 
44
- @dataclasses.dataclass
45
- class HistogramDistanceMetric(abc.ABC):
46
- """
47
- An abstract base class for distance metrics between histograms.
48
-
49
- :args distrib_t: array of distribution t (usually the latest dataset distribution)
50
- :args distrib_u: array of distribution u (usually the sample dataset distribution)
51
-
52
- Each distribution must contain nonnegative floats that sum up to 1.0.
53
- """
54
-
55
- distrib_t: np.ndarray
56
- distrib_u: np.ndarray
57
-
58
- NAME: ClassVar[str]
59
-
60
- # noinspection PyMethodOverriding
61
- def __init_subclass__(cls, *, metric_name: str, **kwargs) -> None:
62
- super().__init_subclass__(**kwargs)
63
- cls.NAME = metric_name
64
-
65
- @abc.abstractmethod
66
- def compute(self) -> float:
67
- raise NotImplementedError
68
-
69
-
70
- class TotalVarianceDistance(HistogramDistanceMetric, metric_name="tvd"):
71
- """
72
- Provides a symmetric drift distance between two periods t and u
73
- Z - vector of random variables
74
- Pt - Probability distribution over time span t
75
- """
76
-
77
- def compute(self) -> float:
78
- """
79
- Calculate Total Variance distance.
80
-
81
- :returns: Total Variance Distance.
82
- """
83
- return np.sum(np.abs(self.distrib_t - self.distrib_u)) / 2
84
-
85
-
86
- class HellingerDistance(HistogramDistanceMetric, metric_name="hellinger"):
87
- """
88
- Hellinger distance is an f divergence measure, similar to the Kullback-Leibler (KL) divergence.
89
- It used to quantify the difference between two probability distributions.
90
- However, unlike KL Divergence the Hellinger divergence is symmetric and bounded over a probability space.
91
- The output range of Hellinger distance is [0,1]. The closer to 0, the more similar the two distributions.
92
- """
93
-
94
- def compute(self) -> float:
95
- """
96
- Calculate Hellinger Distance
97
-
98
- :returns: Hellinger Distance
99
- """
100
- return np.sqrt(
101
- max(
102
- 1 - np.sum(np.sqrt(self.distrib_u * self.distrib_t)),
103
- 0, # numerical errors may produce small negative numbers, e.g. -1e-16.
104
- # However, Cauchy-Schwarz inequality assures this number is in the range [0, 1]
105
- )
106
- )
107
-
108
-
109
- class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
110
- """
111
- KL Divergence (or relative entropy) is a measure of how one probability distribution differs from another.
112
- It is an asymmetric measure (thus it's not a metric) and it doesn't satisfy the triangle inequality.
113
- KL Divergence of 0, indicates two identical distributions.
114
- """
115
-
116
- @staticmethod
117
- def _calc_kl_div(
118
- actual_dist: np.array, expected_dist: np.array, kld_scaling: float
119
- ) -> float:
120
- """Return the asymmetric KL divergence"""
121
- # We take 0*log(0) == 0 for this calculation
122
- mask = actual_dist != 0
123
- actual_dist = actual_dist[mask]
124
- expected_dist = expected_dist[mask]
125
- return np.sum(
126
- actual_dist
127
- * np.log(
128
- actual_dist / np.where(expected_dist != 0, expected_dist, kld_scaling)
129
- ),
130
- )
131
-
132
- def compute(
133
- self, capping: Optional[float] = None, kld_scaling: float = 1e-4
134
- ) -> float:
135
- """
136
- :param capping: A bounded value for the KL Divergence. For infinite distance, the result is replaced with
137
- the capping value which indicates a huge differences between the distributions.
138
- :param kld_scaling: Will be used to replace 0 values for executing the logarithmic operation.
139
-
140
- :returns: symmetric KL Divergence
141
- """
142
- t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, kld_scaling)
143
- u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, kld_scaling)
144
- result = t_u + u_t
145
- if capping and result == float("inf"):
146
- return capping
147
- return result
148
-
149
-
150
48
  class VirtualDrift:
151
49
  """
152
50
  Virtual Drift object is used for handling the drift calculations.
@@ -992,7 +890,7 @@ class BatchProcessor:
992
890
  """
993
891
  stream_http_path = (
994
892
  mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
995
- project=self.project
893
+ project=self.project, namespace=mlrun.mlconf.namespace
996
894
  )
997
895
  )
998
896
 
@@ -20,6 +20,7 @@ import re
20
20
  from collections.abc import Iterator
21
21
  from typing import Any, NamedTuple, Optional, Union, cast
22
22
 
23
+ import nuclio
23
24
  from v3io.dataplane.response import HttpResponseError
24
25
 
25
26
  import mlrun
@@ -29,6 +30,7 @@ import mlrun.feature_store as fstore
29
30
  from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
30
31
  from mlrun.datastore import get_stream_pusher
31
32
  from mlrun.datastore.targets import ParquetTarget
33
+ from mlrun.errors import err_to_str
32
34
  from mlrun.model_monitoring.batch import calculate_inputs_statistics
33
35
  from mlrun.model_monitoring.helpers import (
34
36
  _BatchDict,
@@ -282,33 +284,33 @@ class MonitoringApplicationController:
282
284
 
283
285
  def __init__(
284
286
  self,
285
- context: mlrun.run.MLClientCtx,
287
+ mlrun_context: mlrun.run.MLClientCtx,
286
288
  project: str,
287
289
  ):
288
290
  """
289
291
  Initialize Monitoring Application Processor object.
290
292
 
291
- :param context: An MLRun context.
293
+ :param mlrun_context: An MLRun context.
292
294
  :param project: Project name.
293
295
  """
294
- self.context = context
296
+ self.context = mlrun_context
295
297
  self.project = project
296
298
  self.project_obj = mlrun.get_or_create_project(project)
297
299
 
298
- context.logger.debug(f"Initializing {self.__class__.__name__}", project=project)
300
+ mlrun_context.logger.debug(
301
+ f"Initializing {self.__class__.__name__}", project=project
302
+ )
299
303
 
300
304
  self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
301
305
 
302
306
  self._batch_window_generator = _BatchWindowGenerator(
303
- batch_dict=context.parameters[
304
- mm_constants.EventFieldType.BATCH_INTERVALS_DICT
305
- ]
307
+ batch_dict=json.loads(
308
+ mlrun.get_secret_or_env(
309
+ mm_constants.EventFieldType.BATCH_INTERVALS_DICT
310
+ )
311
+ )
306
312
  )
307
313
 
308
- # If provided, only model endpoints in that that list will be analyzed
309
- self.model_endpoints = context.parameters.get(
310
- mm_constants.EventFieldType.MODEL_ENDPOINTS, None
311
- )
312
314
  self.model_monitoring_access_key = self._get_model_monitoring_access_key()
313
315
  self.parquet_directory = get_monitoring_parquet_path(
314
316
  self.project_obj,
@@ -335,66 +337,82 @@ class MonitoringApplicationController:
335
337
  v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
336
338
  )
337
339
 
338
- def run(self):
340
+ def run(self, event: nuclio.Event):
339
341
  """
340
342
  Main method for run all the relevant monitoring applications on each endpoint
343
+
344
+ :param event: trigger event
341
345
  """
346
+ logger.info("Start running monitoring controller")
342
347
  try:
343
- endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
348
+ applications_names = []
349
+ endpoints = self.db.list_model_endpoints()
350
+ if not endpoints:
351
+ self.context.logger.info(
352
+ "No model endpoints found", project=self.project
353
+ )
354
+ return
344
355
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
345
356
  if monitoring_functions:
357
+ # Gets only application in ready state
346
358
  applications_names = list(
347
- {app.metadata.name for app in monitoring_functions}
359
+ {
360
+ app.metadata.name
361
+ for app in monitoring_functions
362
+ if app.status.state == "ready"
363
+ }
348
364
  )
349
- else:
365
+ if not applications_names:
350
366
  self.context.logger.info(
351
367
  "No monitoring functions found", project=self.project
352
368
  )
353
- applications_names = []
369
+ return
354
370
 
355
371
  except Exception as e:
356
- self.context.logger.error("Failed to list endpoints", exc=e)
357
- return
358
- if endpoints and applications_names:
359
- # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
360
- pool = concurrent.futures.ProcessPoolExecutor(
361
- max_workers=min(len(endpoints), 10),
372
+ self.context.logger.error(
373
+ "Failed to list endpoints and monitoring applications",
374
+ exc=err_to_str(e),
362
375
  )
363
- futures = []
364
- for endpoint in endpoints:
376
+ return
377
+ # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
378
+ pool = concurrent.futures.ProcessPoolExecutor(
379
+ max_workers=min(len(endpoints), 10),
380
+ )
381
+ futures = []
382
+ for endpoint in endpoints:
383
+ if (
384
+ endpoint[mm_constants.EventFieldType.ACTIVE]
385
+ and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
386
+ == mm_constants.ModelMonitoringMode.enabled.value
387
+ ):
388
+ # Skip router endpoint:
365
389
  if (
366
- endpoint[mm_constants.EventFieldType.ACTIVE]
367
- and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
368
- == mm_constants.ModelMonitoringMode.enabled.value
390
+ int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
391
+ == mm_constants.EndpointType.ROUTER
369
392
  ):
370
- # Skip router endpoint:
371
- if (
372
- int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
373
- == mm_constants.EndpointType.ROUTER
374
- ):
375
- # Router endpoint has no feature stats
376
- logger.info(
377
- f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
378
- )
379
- continue
380
- future = pool.submit(
381
- MonitoringApplicationController.model_endpoint_process,
382
- endpoint=endpoint,
383
- applications_names=applications_names,
384
- batch_window_generator=self._batch_window_generator,
385
- project=self.project,
386
- parquet_directory=self.parquet_directory,
387
- storage_options=self.storage_options,
388
- model_monitoring_access_key=self.model_monitoring_access_key,
393
+ # Router endpoint has no feature stats
394
+ logger.info(
395
+ f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
389
396
  )
390
- futures.append(future)
397
+ continue
398
+ future = pool.submit(
399
+ MonitoringApplicationController.model_endpoint_process,
400
+ endpoint=endpoint,
401
+ applications_names=applications_names,
402
+ batch_window_generator=self._batch_window_generator,
403
+ project=self.project,
404
+ parquet_directory=self.parquet_directory,
405
+ storage_options=self.storage_options,
406
+ model_monitoring_access_key=self.model_monitoring_access_key,
407
+ )
408
+ futures.append(future)
391
409
 
392
- for future in concurrent.futures.as_completed(futures):
393
- result = future.result()
394
- if result:
395
- self.context.log_results(result)
410
+ for future in concurrent.futures.as_completed(futures):
411
+ result = future.result()
412
+ if result:
413
+ self.context.log_results(result)
396
414
 
397
- self._delete_old_parquet(endpoints=endpoints)
415
+ self._delete_old_parquet(endpoints=endpoints)
398
416
 
399
417
  @classmethod
400
418
  def model_endpoint_process(
@@ -525,7 +543,7 @@ class MonitoringApplicationController:
525
543
  """
526
544
  if self.parquet_directory.startswith("v3io:///"):
527
545
  # create fs with access to the user side (under projects)
528
- store, _ = mlrun.store_manager.get_or_create_store(
546
+ store, _, _ = mlrun.store_manager.get_or_create_store(
529
547
  self.parquet_directory,
530
548
  {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
531
549
  )
@@ -601,12 +619,12 @@ class MonitoringApplicationController:
601
619
  mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
602
620
  mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
603
621
  project=project,
604
- application_name=mm_constants.MonitoringFunctionNames.WRITER,
622
+ function_name=mm_constants.MonitoringFunctionNames.WRITER,
605
623
  ),
606
624
  }
607
625
  for app_name in applications_names:
608
626
  data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
609
- stream_uri = get_stream_path(project=project, application_name=app_name)
627
+ stream_uri = get_stream_path(project=project, function_name=app_name)
610
628
 
611
629
  logger.info(
612
630
  f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
@@ -11,19 +11,27 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import nuclio
14
15
 
15
16
  import mlrun
16
17
  from mlrun.model_monitoring.controller import MonitoringApplicationController
17
18
 
18
19
 
19
- def handler(context: mlrun.run.MLClientCtx) -> None:
20
+ def handler(context: nuclio.Context, event: nuclio.Event) -> None:
20
21
  """
21
22
  Run model monitoring application processor
22
23
 
23
- :param context: the MLRun context
24
+ :param context: the Nuclio context
25
+ :param event: trigger event
24
26
  """
27
+ context.user_data.monitor_app_controller.run(event)
28
+
29
+
30
+ def init_context(context):
31
+ mlrun_context = mlrun.get_or_create_ctx("model_monitoring_controller")
32
+ mlrun_context.logger.info("Initialize monitoring app controller")
25
33
  monitor_app_controller = MonitoringApplicationController(
26
- context=context,
27
- project=context.project,
34
+ mlrun_context=mlrun_context,
35
+ project=mlrun_context.project,
28
36
  )
29
- monitor_app_controller.run()
37
+ setattr(context.user_data, "monitor_app_controller", monitor_app_controller)