mlrun 1.7.0rc26__py3-none-any.whl → 1.7.0rc31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (78) hide show
  1. mlrun/__main__.py +7 -7
  2. mlrun/alerts/alert.py +13 -1
  3. mlrun/artifacts/manager.py +5 -0
  4. mlrun/common/constants.py +3 -3
  5. mlrun/common/formatters/artifact.py +1 -0
  6. mlrun/common/formatters/base.py +9 -9
  7. mlrun/common/schemas/alert.py +4 -8
  8. mlrun/common/schemas/api_gateway.py +7 -0
  9. mlrun/common/schemas/constants.py +3 -0
  10. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  11. mlrun/common/schemas/model_monitoring/constants.py +32 -13
  12. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
  13. mlrun/common/schemas/project.py +10 -9
  14. mlrun/common/schemas/schedule.py +1 -1
  15. mlrun/config.py +37 -11
  16. mlrun/data_types/spark.py +2 -2
  17. mlrun/data_types/to_pandas.py +48 -16
  18. mlrun/datastore/__init__.py +1 -0
  19. mlrun/datastore/azure_blob.py +2 -1
  20. mlrun/datastore/base.py +21 -13
  21. mlrun/datastore/datastore.py +7 -5
  22. mlrun/datastore/datastore_profile.py +1 -1
  23. mlrun/datastore/google_cloud_storage.py +1 -0
  24. mlrun/datastore/inmem.py +4 -1
  25. mlrun/datastore/s3.py +2 -0
  26. mlrun/datastore/snowflake_utils.py +3 -1
  27. mlrun/datastore/sources.py +40 -11
  28. mlrun/datastore/store_resources.py +2 -0
  29. mlrun/datastore/targets.py +71 -26
  30. mlrun/db/base.py +11 -0
  31. mlrun/db/httpdb.py +50 -31
  32. mlrun/db/nopdb.py +11 -1
  33. mlrun/errors.py +4 -0
  34. mlrun/execution.py +18 -10
  35. mlrun/feature_store/retrieval/spark_merger.py +4 -32
  36. mlrun/launcher/local.py +2 -2
  37. mlrun/model.py +27 -1
  38. mlrun/model_monitoring/api.py +9 -55
  39. mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
  40. mlrun/model_monitoring/controller.py +57 -73
  41. mlrun/model_monitoring/db/stores/__init__.py +21 -9
  42. mlrun/model_monitoring/db/stores/base/store.py +39 -1
  43. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  44. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
  45. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +41 -80
  46. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +22 -27
  47. mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
  48. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
  49. mlrun/model_monitoring/helpers.py +15 -17
  50. mlrun/model_monitoring/writer.py +2 -7
  51. mlrun/projects/operations.py +1 -0
  52. mlrun/projects/project.py +87 -75
  53. mlrun/render.py +10 -5
  54. mlrun/run.py +7 -7
  55. mlrun/runtimes/base.py +1 -1
  56. mlrun/runtimes/daskjob.py +7 -1
  57. mlrun/runtimes/local.py +24 -7
  58. mlrun/runtimes/nuclio/function.py +20 -0
  59. mlrun/runtimes/pod.py +5 -29
  60. mlrun/serving/routers.py +75 -59
  61. mlrun/serving/server.py +1 -0
  62. mlrun/serving/v2_serving.py +8 -1
  63. mlrun/utils/helpers.py +46 -2
  64. mlrun/utils/logger.py +36 -2
  65. mlrun/utils/notifications/notification/base.py +4 -0
  66. mlrun/utils/notifications/notification/git.py +21 -0
  67. mlrun/utils/notifications/notification/slack.py +8 -0
  68. mlrun/utils/notifications/notification/webhook.py +41 -1
  69. mlrun/utils/notifications/notification_pusher.py +2 -2
  70. mlrun/utils/version/version.json +2 -2
  71. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/METADATA +13 -8
  72. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/RECORD +76 -78
  73. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/WHEEL +1 -1
  74. mlrun/feature_store/retrieval/conversion.py +0 -271
  75. mlrun/model_monitoring/controller_handler.py +0 -37
  76. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/LICENSE +0 -0
  77. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/entry_points.txt +0 -0
  78. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/top_level.txt +0 -0
mlrun/db/nopdb.py CHANGED
@@ -162,6 +162,7 @@ class NopDB(RunDBInterface):
162
162
  mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
163
163
  ),
164
164
  secrets: dict = None,
165
+ iter=None,
165
166
  ):
166
167
  pass
167
168
 
@@ -708,6 +709,7 @@ class NopDB(RunDBInterface):
708
709
  image: str = "mlrun/mlrun",
709
710
  deploy_histogram_data_drift_app: bool = True,
710
711
  rebuild_images: bool = False,
712
+ fetch_credentials_from_sys_config: bool = False,
711
713
  ) -> None:
712
714
  pass
713
715
 
@@ -730,7 +732,15 @@ class NopDB(RunDBInterface):
730
732
  def deploy_histogram_data_drift_app(
731
733
  self, project: str, image: str = "mlrun/mlrun"
732
734
  ) -> None:
733
- raise NotImplementedError
735
+ pass
736
+
737
+ def set_model_monitoring_credentials(
738
+ self,
739
+ project: str,
740
+ credentials: dict[str, str],
741
+ replace_creds: bool,
742
+ ) -> None:
743
+ pass
734
744
 
735
745
  def generate_event(
736
746
  self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
mlrun/errors.py CHANGED
@@ -205,6 +205,10 @@ class MLRunTimeoutError(MLRunHTTPStatusError, TimeoutError):
205
205
  error_status_code = HTTPStatus.GATEWAY_TIMEOUT.value
206
206
 
207
207
 
208
+ class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
209
+ error_status_code = HTTPStatus.BAD_REQUEST.value
210
+
211
+
208
212
  class MLRunRetryExhaustedError(Exception):
209
213
  pass
210
214
 
mlrun/execution.py CHANGED
@@ -34,13 +34,13 @@ from .features import Feature
34
34
  from .model import HyperParamOptions
35
35
  from .secrets import SecretsStore
36
36
  from .utils import (
37
+ RunKeys,
37
38
  dict_to_json,
38
39
  dict_to_yaml,
39
40
  get_in,
40
41
  is_relative_path,
41
42
  logger,
42
43
  now_date,
43
- run_keys,
44
44
  to_date_str,
45
45
  update_in,
46
46
  )
@@ -85,6 +85,7 @@ class MLClientCtx:
85
85
 
86
86
  self._labels = {}
87
87
  self._annotations = {}
88
+ self._node_selector = {}
88
89
 
89
90
  self._function = ""
90
91
  self._parameters = {}
@@ -207,6 +208,11 @@ class MLClientCtx:
207
208
  """Dictionary with labels (read-only)"""
208
209
  return deepcopy(self._labels)
209
210
 
211
+ @property
212
+ def node_selector(self):
213
+ """Dictionary with node selectors (read-only)"""
214
+ return deepcopy(self._node_selector)
215
+
210
216
  @property
211
217
  def annotations(self):
212
218
  """Dictionary with annotations (read-only)"""
@@ -365,7 +371,7 @@ class MLClientCtx:
365
371
  self._labels = meta.get("labels", self._labels)
366
372
  spec = attrs.get("spec")
367
373
  if spec:
368
- self._secrets_manager = SecretsStore.from_list(spec.get(run_keys.secrets))
374
+ self._secrets_manager = SecretsStore.from_list(spec.get(RunKeys.secrets))
369
375
  self._log_level = spec.get("log_level", self._log_level)
370
376
  self._function = spec.get("function", self._function)
371
377
  self._parameters = spec.get("parameters", self._parameters)
@@ -383,13 +389,14 @@ class MLClientCtx:
383
389
  self._allow_empty_resources = spec.get(
384
390
  "allow_empty_resources", self._allow_empty_resources
385
391
  )
386
- self.artifact_path = spec.get(run_keys.output_path, self.artifact_path)
387
- self._in_path = spec.get(run_keys.input_path, self._in_path)
388
- inputs = spec.get(run_keys.inputs)
392
+ self.artifact_path = spec.get(RunKeys.output_path, self.artifact_path)
393
+ self._in_path = spec.get(RunKeys.input_path, self._in_path)
394
+ inputs = spec.get(RunKeys.inputs)
389
395
  self._notifications = spec.get("notifications", self._notifications)
390
396
  self._state_thresholds = spec.get(
391
397
  "state_thresholds", self._state_thresholds
392
398
  )
399
+ self._node_selector = spec.get("node_selector", self._node_selector)
393
400
  self._reset_on_run = spec.get("reset_on_run", self._reset_on_run)
394
401
 
395
402
  self._init_dbs(rundb)
@@ -567,7 +574,7 @@ class MLClientCtx:
567
574
  self._results["best_iteration"] = best
568
575
  for k, v in get_in(task, ["status", "results"], {}).items():
569
576
  self._results[k] = v
570
- for artifact in get_in(task, ["status", run_keys.artifacts], []):
577
+ for artifact in get_in(task, ["status", RunKeys.artifacts], []):
571
578
  self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
572
579
  artifact
573
580
  )
@@ -939,10 +946,11 @@ class MLClientCtx:
939
946
  "parameters": self._parameters,
940
947
  "handler": self._handler,
941
948
  "outputs": self._outputs,
942
- run_keys.output_path: self.artifact_path,
943
- run_keys.inputs: self._inputs,
949
+ RunKeys.output_path: self.artifact_path,
950
+ RunKeys.inputs: self._inputs,
944
951
  "notifications": self._notifications,
945
952
  "state_thresholds": self._state_thresholds,
953
+ "node_selector": self._node_selector,
946
954
  },
947
955
  "status": {
948
956
  "results": self._results,
@@ -964,7 +972,7 @@ class MLClientCtx:
964
972
  set_if_not_none(struct["status"], "commit", self._commit)
965
973
  set_if_not_none(struct["status"], "iterations", self._iteration_results)
966
974
 
967
- struct["status"][run_keys.artifacts] = self._artifacts_manager.artifact_list()
975
+ struct["status"][RunKeys.artifacts] = self._artifacts_manager.artifact_list()
968
976
  self._data_stores.to_dict(struct["spec"])
969
977
  return struct
970
978
 
@@ -1058,7 +1066,7 @@ class MLClientCtx:
1058
1066
  set_if_not_none(struct, "status.commit", self._commit)
1059
1067
  set_if_not_none(struct, "status.iterations", self._iteration_results)
1060
1068
 
1061
- struct[f"status.{run_keys.artifacts}"] = self._artifacts_manager.artifact_list()
1069
+ struct[f"status.{RunKeys.artifacts}"] = self._artifacts_manager.artifact_list()
1062
1070
  return struct
1063
1071
 
1064
1072
  def _init_dbs(self, rundb):
@@ -13,44 +13,16 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- import pandas as pd
17
- import semver
18
16
 
19
17
  import mlrun
18
+ from mlrun.data_types.to_pandas import spark_df_to_pandas
20
19
  from mlrun.datastore.sources import ParquetSource
21
20
  from mlrun.datastore.targets import get_offline_target
21
+ from mlrun.runtimes import RemoteSparkRuntime
22
+ from mlrun.runtimes.sparkjob import Spark3Runtime
22
23
  from mlrun.utils.helpers import additional_filters_warning
23
24
 
24
- from ...runtimes import RemoteSparkRuntime
25
- from ...runtimes.sparkjob import Spark3Runtime
26
25
  from .base import BaseMerger
27
- from .conversion import PandasConversionMixin
28
-
29
-
30
- def spark_df_to_pandas(spark_df):
31
- # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
32
- # when we upgrade pyspark, we should check whether this workaround is still necessary
33
- # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
34
- if semver.parse(pd.__version__)["major"] >= 2:
35
- import pyspark.sql.functions as pyspark_functions
36
-
37
- type_conversion_dict = {}
38
- for field in spark_df.schema.fields:
39
- if str(field.dataType) == "TimestampType":
40
- spark_df = spark_df.withColumn(
41
- field.name,
42
- pyspark_functions.date_format(
43
- pyspark_functions.to_timestamp(field.name),
44
- "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
45
- ),
46
- )
47
- type_conversion_dict[field.name] = "datetime64[ns]"
48
- df = PandasConversionMixin.toPandas(spark_df)
49
- if type_conversion_dict:
50
- df = df.astype(type_conversion_dict)
51
- return df
52
- else:
53
- return PandasConversionMixin.toPandas(spark_df)
54
26
 
55
27
 
56
28
  class SparkFeatureMerger(BaseMerger):
@@ -252,7 +224,7 @@ class SparkFeatureMerger(BaseMerger):
252
224
  )
253
225
  source_kind = target.kind
254
226
  source_path = target.get_target_path()
255
-
227
+ source_kwargs = target.source_spark_attributes
256
228
  # handling case where there are multiple feature sets and user creates vector where
257
229
  # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
258
230
  source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
mlrun/launcher/local.py CHANGED
@@ -72,9 +72,9 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
72
72
  reset_on_run: Optional[bool] = None,
73
73
  ) -> "mlrun.run.RunObject":
74
74
  # do not allow local function to be scheduled
75
- if self._is_run_local and schedule is not None:
75
+ if schedule is not None:
76
76
  raise mlrun.errors.MLRunInvalidArgumentError(
77
- "local and schedule cannot be used together"
77
+ f"Unexpected {schedule=} parameter for local function execution"
78
78
  )
79
79
 
80
80
  self.enrich_runtime(runtime, project)
mlrun/model.py CHANGED
@@ -732,6 +732,30 @@ class Notification(ModelObj):
732
732
  "Notification params size exceeds max size of 1 MB"
733
733
  )
734
734
 
735
+ def validate_notification_params(self):
736
+ notification_class = mlrun.utils.notifications.NotificationTypes(
737
+ self.kind
738
+ ).get_notification()
739
+
740
+ secret_params = self.secret_params or {}
741
+ params = self.params or {}
742
+
743
+ # if the secret_params are already masked - no need to validate
744
+ params_secret = secret_params.get("secret", "")
745
+ if params_secret:
746
+ if len(secret_params) > 1:
747
+ raise mlrun.errors.MLRunInvalidArgumentError(
748
+ "When the 'secret' key is present, 'secret_params' should not contain any other keys."
749
+ )
750
+ return
751
+
752
+ if not secret_params and not params:
753
+ raise mlrun.errors.MLRunInvalidArgumentError(
754
+ "Both 'secret_params' and 'params' are empty, at least one must be defined."
755
+ )
756
+
757
+ notification_class.validate_params(secret_params or params)
758
+
735
759
  @staticmethod
736
760
  def validate_notification_uniqueness(notifications: list["Notification"]):
737
761
  """Validate that all notifications in the list are unique by name"""
@@ -873,6 +897,7 @@ class RunSpec(ModelObj):
873
897
  notifications=None,
874
898
  state_thresholds=None,
875
899
  reset_on_run=None,
900
+ node_selector=None,
876
901
  ):
877
902
  # A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
878
903
  # keys (parameter names) and the values are the type hint given in the input keys after the colon.
@@ -910,6 +935,7 @@ class RunSpec(ModelObj):
910
935
  self._notifications = notifications or []
911
936
  self.state_thresholds = state_thresholds or {}
912
937
  self.reset_on_run = reset_on_run
938
+ self.node_selector = node_selector or {}
913
939
 
914
940
  def _serialize_field(
915
941
  self, struct: dict, field_name: str = None, strip: bool = False
@@ -1285,7 +1311,7 @@ class RunTemplate(ModelObj):
1285
1311
 
1286
1312
  task.with_input("data", "/file-dir/path/to/file")
1287
1313
  task.with_input("data", "s3://<bucket>/path/to/file")
1288
- task.with_input("data", "v3io://[<remote-host>]/<data-container>/path/to/file")
1314
+ task.with_input("data", "v3io://<data-container>/path/to/file")
1289
1315
  """
1290
1316
  if not self.spec.inputs:
1291
1317
  self.spec.inputs = {}
@@ -47,8 +47,8 @@ def get_or_create_model_endpoint(
47
47
  function_name: str = "",
48
48
  context: mlrun.MLClientCtx = None,
49
49
  sample_set_statistics: dict[str, typing.Any] = None,
50
- drift_threshold: float = None,
51
- possible_drift_threshold: float = None,
50
+ drift_threshold: typing.Optional[float] = None,
51
+ possible_drift_threshold: typing.Optional[float] = None,
52
52
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
53
53
  db_session=None,
54
54
  ) -> ModelEndpoint:
@@ -69,14 +69,14 @@ def get_or_create_model_endpoint(
69
69
  full function hash.
70
70
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
71
71
  the new model endpoint (applicable only to new endpoint_id).
72
- :param drift_threshold: The threshold of which to mark drifts (applicable only to new endpoint_id).
73
- :param possible_drift_threshold: The threshold of which to mark possible drifts (applicable only to new
72
+ :param drift_threshold: (deprecated) The threshold of which to mark drifts (applicable only to new
73
+ endpoint_id).
74
+ :param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts (applicable only to new
74
75
  endpoint_id).
75
76
  :param monitoring_mode: If enabled, apply model monitoring features on the provided endpoint id
76
77
  (applicable only to new endpoint_id).
77
78
  :param db_session: A runtime session that manages the current dialog with the database.
78
79
 
79
-
80
80
  :return: A ModelEndpoint object
81
81
  """
82
82
 
@@ -98,8 +98,6 @@ def get_or_create_model_endpoint(
98
98
  model_endpoint=model_endpoint,
99
99
  model_path=model_path,
100
100
  sample_set_statistics=sample_set_statistics,
101
- drift_threshold=drift_threshold,
102
- possible_drift_threshold=possible_drift_threshold,
103
101
  )
104
102
 
105
103
  except mlrun.errors.MLRunNotFoundError:
@@ -113,8 +111,6 @@ def get_or_create_model_endpoint(
113
111
  function_name=function_name,
114
112
  context=context,
115
113
  sample_set_statistics=sample_set_statistics,
116
- drift_threshold=drift_threshold,
117
- possible_drift_threshold=possible_drift_threshold,
118
114
  monitoring_mode=monitoring_mode,
119
115
  )
120
116
  return model_endpoint
@@ -241,9 +237,7 @@ def _model_endpoint_validations(
241
237
  model_endpoint: ModelEndpoint,
242
238
  model_path: str = "",
243
239
  sample_set_statistics: dict[str, typing.Any] = None,
244
- drift_threshold: float = None,
245
- possible_drift_threshold: float = None,
246
- ):
240
+ ) -> None:
247
241
  """
248
242
  Validate that provided model endpoint configurations match the stored fields of the provided `ModelEndpoint`
249
243
  object. Usually, this method is called by `get_or_create_model_endpoint()` in cases that the model endpoint
@@ -257,11 +251,6 @@ def _model_endpoint_validations(
257
251
  is forbidden to provide a different reference data to that model endpoint.
258
252
  In case of discrepancy between the provided `sample_set_statistics` and the
259
253
  `model_endpoints.spec.feature_stats`, a warning will be presented to the user.
260
- :param drift_threshold: The threshold of which to mark drifts. Should be similar to the drift threshold
261
- that has already assigned to the current model endpoint.
262
- :param possible_drift_threshold: The threshold of which to mark possible drifts. Should be similar to the possible
263
- drift threshold that has already assigned to the current model endpoint.
264
-
265
254
  """
266
255
  # Model path
267
256
  if model_path and model_endpoint.spec.model_uri != model_path:
@@ -280,28 +269,6 @@ def _model_endpoint_validations(
280
269
  "Provided sample set statistics is different from the registered statistics. "
281
270
  "If new sample set statistics is to be used, new model endpoint should be created"
282
271
  )
283
- # drift and possible drift thresholds
284
- if drift_threshold:
285
- current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
286
- mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
287
- mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
288
- )
289
- if current_drift_threshold != drift_threshold:
290
- raise mlrun.errors.MLRunInvalidArgumentError(
291
- f"Cannot change existing drift threshold. Expected {current_drift_threshold}, got {drift_threshold} "
292
- f"Please update drift threshold or generate a new model endpoint record"
293
- )
294
-
295
- if possible_drift_threshold:
296
- current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
297
- mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
298
- mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
299
- )
300
- if current_possible_drift_threshold != possible_drift_threshold:
301
- raise mlrun.errors.MLRunInvalidArgumentError(
302
- f"Cannot change existing possible drift threshold. Expected {current_possible_drift_threshold}, "
303
- f"got {possible_drift_threshold}. Please update drift threshold or generate a new model endpoint record"
304
- )
305
272
 
306
273
 
307
274
  def write_monitoring_df(
@@ -354,8 +321,6 @@ def _generate_model_endpoint(
354
321
  function_name: str,
355
322
  context: mlrun.MLClientCtx,
356
323
  sample_set_statistics: dict[str, typing.Any],
357
- drift_threshold: float,
358
- possible_drift_threshold: float,
359
324
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
360
325
  ) -> ModelEndpoint:
361
326
  """
@@ -374,8 +339,6 @@ def _generate_model_endpoint(
374
339
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
375
340
  the current model endpoint. Will be stored under
376
341
  `model_endpoint.status.feature_stats`.
377
- :param drift_threshold: The threshold of which to mark drifts.
378
- :param possible_drift_threshold: The threshold of which to mark possible drifts.
379
342
 
380
343
  :return `mlrun.model_monitoring.model_endpoint.ModelEndpoint` object.
381
344
  """
@@ -393,15 +356,6 @@ def _generate_model_endpoint(
393
356
  model_endpoint.spec.model_uri = model_path
394
357
  model_endpoint.spec.model = model_endpoint_name
395
358
  model_endpoint.spec.model_class = "drift-analysis"
396
- if drift_threshold:
397
- model_endpoint.spec.monitor_configuration[
398
- mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
399
- ] = drift_threshold
400
- if possible_drift_threshold:
401
- model_endpoint.spec.monitor_configuration[
402
- mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
403
- ] = possible_drift_threshold
404
-
405
359
  model_endpoint.spec.monitoring_mode = monitoring_mode
406
360
  model_endpoint.status.first_request = model_endpoint.status.last_request = (
407
361
  datetime_now().isoformat()
@@ -615,10 +569,10 @@ def _create_model_monitoring_function_base(
615
569
  "please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
616
570
  FutureWarning,
617
571
  )
618
- if name in mm_constants.MonitoringFunctionNames.list():
572
+ if name in mm_constants._RESERVED_FUNCTION_NAMES:
619
573
  raise mlrun.errors.MLRunInvalidArgumentError(
620
- f"An application cannot have the following names: "
621
- f"{mm_constants.MonitoringFunctionNames.list()}"
574
+ "An application cannot have the following names: "
575
+ f"{mm_constants._RESERVED_FUNCTION_NAMES}"
622
576
  )
623
577
  if func is None:
624
578
  func = ""
@@ -195,7 +195,10 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
195
195
  EventFieldType.CURRENT_STATS: json.dumps(
196
196
  monitoring_context.sample_df_stats
197
197
  ),
198
- EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
198
+ EventFieldType.DRIFT_MEASURES: json.dumps(
199
+ metrics_per_feature.T.to_dict()
200
+ | {metric.name: metric.value for metric in metrics}
201
+ ),
199
202
  EventFieldType.DRIFT_STATUS: status.value,
200
203
  },
201
204
  )
@@ -273,26 +273,14 @@ class MonitoringApplicationController:
273
273
  Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
274
274
  """
275
275
 
276
- def __init__(
277
- self,
278
- mlrun_context: mlrun.run.MLClientCtx,
279
- project: str,
280
- ):
281
- """
282
- Initialize Monitoring Application Processor object.
276
+ def __init__(self) -> None:
277
+ """Initialize Monitoring Application Controller"""
278
+ self.project = cast(str, mlrun.mlconf.default_project)
279
+ self.project_obj = mlrun.load_project(name=self.project, url=self.project)
283
280
 
284
- :param mlrun_context: An MLRun context.
285
- :param project: Project name.
286
- """
287
- self.context = mlrun_context
288
- self.project = project
289
- self.project_obj = mlrun.get_or_create_project(project)
281
+ logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
290
282
 
291
- mlrun_context.logger.debug(
292
- f"Initializing {self.__class__.__name__}", project=project
293
- )
294
-
295
- self.db = mlrun.model_monitoring.get_store_object(project=project)
283
+ self.db = mlrun.model_monitoring.get_store_object(project=self.project)
296
284
 
297
285
  self._batch_window_generator = _BatchWindowGenerator(
298
286
  batch_dict=json.loads(
@@ -322,26 +310,27 @@ class MonitoringApplicationController:
322
310
  return access_key
323
311
 
324
312
  def _initialize_v3io_configurations(self) -> None:
325
- self.v3io_framesd = mlrun.mlconf.v3io_framesd
326
- self.v3io_api = mlrun.mlconf.v3io_api
327
313
  self.storage_options = dict(
328
- v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
314
+ v3io_access_key=self.model_monitoring_access_key,
315
+ v3io_api=mlrun.mlconf.v3io_api,
329
316
  )
330
317
 
331
- def run(self, event: nuclio.Event):
318
+ def run(self) -> None:
332
319
  """
333
- Main method for run all the relevant monitoring applications on each endpoint
334
-
335
- :param event: trigger event
320
+ Main method for run all the relevant monitoring applications on each endpoint.
321
+ This method handles the following:
322
+ 1. List model endpoints
323
+ 2. List applications
324
+ 3. Check model monitoring windows
325
+ 4. Send data to applications
326
+ 5. Delete old parquets
336
327
  """
337
328
  logger.info("Start running monitoring controller")
338
329
  try:
339
330
  applications_names = []
340
331
  endpoints = self.db.list_model_endpoints()
341
332
  if not endpoints:
342
- self.context.logger.info(
343
- "No model endpoints found", project=self.project
344
- )
333
+ logger.info("No model endpoints found", project=self.project)
345
334
  return
346
335
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
347
336
  if monitoring_functions:
@@ -359,58 +348,49 @@ class MonitoringApplicationController:
359
348
  }
360
349
  )
361
350
  if not applications_names:
362
- self.context.logger.info(
363
- "No monitoring functions found", project=self.project
364
- )
351
+ logger.info("No monitoring functions found", project=self.project)
365
352
  return
366
- self.context.logger.info(
353
+ logger.info(
367
354
  "Starting to iterate over the applications",
368
355
  applications=applications_names,
369
356
  )
370
357
 
371
358
  except Exception as e:
372
- self.context.logger.error(
359
+ logger.error(
373
360
  "Failed to list endpoints and monitoring applications",
374
361
  exc=err_to_str(e),
375
362
  )
376
363
  return
377
364
  # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
378
- pool = concurrent.futures.ProcessPoolExecutor(
379
- max_workers=min(len(endpoints), 10),
380
- )
381
- futures = []
382
- for endpoint in endpoints:
383
- if (
384
- endpoint[mm_constants.EventFieldType.ACTIVE]
385
- and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
386
- == mm_constants.ModelMonitoringMode.enabled.value
387
- ):
388
- # Skip router endpoint:
365
+ with concurrent.futures.ProcessPoolExecutor(
366
+ max_workers=min(len(endpoints), 10)
367
+ ) as pool:
368
+ for endpoint in endpoints:
389
369
  if (
390
- int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
391
- == mm_constants.EndpointType.ROUTER
370
+ endpoint[mm_constants.EventFieldType.ACTIVE]
371
+ and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
372
+ == mm_constants.ModelMonitoringMode.enabled.value
392
373
  ):
393
- # Router endpoint has no feature stats
394
- logger.info(
395
- f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
374
+ # Skip router endpoint:
375
+ if (
376
+ int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
377
+ == mm_constants.EndpointType.ROUTER
378
+ ):
379
+ # Router endpoint has no feature stats
380
+ logger.info(
381
+ f"{endpoint[mm_constants.EventFieldType.UID]} is router, skipping"
382
+ )
383
+ continue
384
+ pool.submit(
385
+ MonitoringApplicationController.model_endpoint_process,
386
+ endpoint=endpoint,
387
+ applications_names=applications_names,
388
+ batch_window_generator=self._batch_window_generator,
389
+ project=self.project,
390
+ parquet_directory=self.parquet_directory,
391
+ storage_options=self.storage_options,
392
+ model_monitoring_access_key=self.model_monitoring_access_key,
396
393
  )
397
- continue
398
- future = pool.submit(
399
- MonitoringApplicationController.model_endpoint_process,
400
- endpoint=endpoint,
401
- applications_names=applications_names,
402
- batch_window_generator=self._batch_window_generator,
403
- project=self.project,
404
- parquet_directory=self.parquet_directory,
405
- storage_options=self.storage_options,
406
- model_monitoring_access_key=self.model_monitoring_access_key,
407
- )
408
- futures.append(future)
409
-
410
- for future in concurrent.futures.as_completed(futures):
411
- result = future.result()
412
- if result:
413
- self.context.log_results(result)
414
394
 
415
395
  self._delete_old_parquet(endpoints=endpoints)
416
396
 
@@ -424,7 +404,7 @@ class MonitoringApplicationController:
424
404
  parquet_directory: str,
425
405
  storage_options: dict,
426
406
  model_monitoring_access_key: str,
427
- ) -> Optional[dict[str, list[str]]]:
407
+ ) -> None:
428
408
  """
429
409
  Process a model endpoint and trigger the monitoring applications. This function running on different process
430
410
  for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -437,10 +417,8 @@ class MonitoringApplicationController:
437
417
  :param parquet_directory: (str) Directory to store application parquet files
438
418
  :param storage_options: (dict) Storage options for writing ParquetTarget.
439
419
  :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
440
-
441
420
  """
442
421
  endpoint_id = endpoint[mm_constants.EventFieldType.UID]
443
- start_times: set[datetime.datetime] = set()
444
422
  try:
445
423
  m_fs = fstore.get_feature_set(
446
424
  endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
@@ -518,16 +496,12 @@ class MonitoringApplicationController:
518
496
  model_monitoring_access_key=model_monitoring_access_key,
519
497
  parquet_target_path=parquet_target_path,
520
498
  )
521
- start_times.add(start_infer_time)
522
499
  except Exception:
523
500
  logger.exception(
524
501
  "Encountered an exception",
525
502
  endpoint_id=endpoint[mm_constants.EventFieldType.UID],
526
503
  )
527
504
 
528
- if start_times:
529
- return {endpoint_id: [str(t) for t in sorted(list(start_times))]}
530
-
531
505
  def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
532
506
  """
533
507
  Delete application parquets older than the argument days.
@@ -673,3 +647,13 @@ class MonitoringApplicationController:
673
647
  ),
674
648
  )
675
649
  return offline_response
650
+
651
+
652
+ def handler(context: nuclio.Context, event: nuclio.Event) -> None:
653
+ """
654
+ Run model monitoring application processor
655
+
656
+ :param context: the Nuclio context
657
+ :param event: trigger event
658
+ """
659
+ MonitoringApplicationController().run()