mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show
  1. mlrun/__init__.py +24 -3
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/document.py +6 -1
  5. mlrun/artifacts/llm_prompt.py +21 -15
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/artifacts/plots.py +1 -1
  8. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  9. mlrun/auth/nuclio.py +89 -0
  10. mlrun/auth/providers.py +429 -0
  11. mlrun/auth/utils.py +415 -0
  12. mlrun/common/constants.py +14 -0
  13. mlrun/common/model_monitoring/helpers.py +123 -0
  14. mlrun/common/runtimes/constants.py +28 -0
  15. mlrun/common/schemas/__init__.py +14 -3
  16. mlrun/common/schemas/alert.py +2 -2
  17. mlrun/common/schemas/api_gateway.py +3 -0
  18. mlrun/common/schemas/auth.py +12 -10
  19. mlrun/common/schemas/client_spec.py +4 -0
  20. mlrun/common/schemas/constants.py +25 -0
  21. mlrun/common/schemas/frontend_spec.py +1 -8
  22. mlrun/common/schemas/function.py +34 -0
  23. mlrun/common/schemas/hub.py +33 -20
  24. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  25. mlrun/common/schemas/model_monitoring/constants.py +12 -15
  26. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  27. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  28. mlrun/common/schemas/pipeline.py +1 -1
  29. mlrun/common/schemas/secret.py +17 -2
  30. mlrun/common/secrets.py +95 -1
  31. mlrun/common/types.py +10 -10
  32. mlrun/config.py +69 -19
  33. mlrun/data_types/infer.py +2 -2
  34. mlrun/datastore/__init__.py +12 -5
  35. mlrun/datastore/azure_blob.py +162 -47
  36. mlrun/datastore/base.py +274 -10
  37. mlrun/datastore/datastore.py +7 -2
  38. mlrun/datastore/datastore_profile.py +84 -22
  39. mlrun/datastore/model_provider/huggingface_provider.py +225 -41
  40. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  41. mlrun/datastore/model_provider/model_provider.py +206 -74
  42. mlrun/datastore/model_provider/openai_provider.py +226 -66
  43. mlrun/datastore/s3.py +39 -18
  44. mlrun/datastore/sources.py +1 -1
  45. mlrun/datastore/store_resources.py +4 -4
  46. mlrun/datastore/storeytargets.py +17 -12
  47. mlrun/datastore/targets.py +1 -1
  48. mlrun/datastore/utils.py +25 -6
  49. mlrun/datastore/v3io.py +1 -1
  50. mlrun/db/base.py +63 -32
  51. mlrun/db/httpdb.py +373 -153
  52. mlrun/db/nopdb.py +54 -21
  53. mlrun/errors.py +4 -2
  54. mlrun/execution.py +66 -25
  55. mlrun/feature_store/api.py +1 -1
  56. mlrun/feature_store/common.py +1 -1
  57. mlrun/feature_store/feature_vector_utils.py +1 -1
  58. mlrun/feature_store/steps.py +8 -6
  59. mlrun/frameworks/_common/utils.py +3 -3
  60. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  61. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  62. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  63. mlrun/frameworks/_ml_common/utils.py +2 -1
  64. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  65. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  66. mlrun/frameworks/onnx/dataset.py +2 -1
  67. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  68. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  69. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  70. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  71. mlrun/frameworks/pytorch/utils.py +2 -1
  72. mlrun/frameworks/sklearn/metric.py +2 -1
  73. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  74. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  75. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  76. mlrun/hub/__init__.py +52 -0
  77. mlrun/hub/base.py +142 -0
  78. mlrun/hub/module.py +172 -0
  79. mlrun/hub/step.py +113 -0
  80. mlrun/k8s_utils.py +105 -16
  81. mlrun/launcher/base.py +15 -7
  82. mlrun/launcher/local.py +4 -1
  83. mlrun/model.py +14 -4
  84. mlrun/model_monitoring/__init__.py +0 -1
  85. mlrun/model_monitoring/api.py +65 -28
  86. mlrun/model_monitoring/applications/__init__.py +1 -1
  87. mlrun/model_monitoring/applications/base.py +299 -128
  88. mlrun/model_monitoring/applications/context.py +2 -4
  89. mlrun/model_monitoring/controller.py +132 -58
  90. mlrun/model_monitoring/db/_schedules.py +38 -29
  91. mlrun/model_monitoring/db/_stats.py +6 -16
  92. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  93. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  94. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  95. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  98. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  99. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  100. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  101. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  102. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  103. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  104. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  105. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  106. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  107. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  108. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
  109. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
  110. mlrun/model_monitoring/features_drift_table.py +2 -1
  111. mlrun/model_monitoring/helpers.py +30 -6
  112. mlrun/model_monitoring/stream_processing.py +34 -28
  113. mlrun/model_monitoring/writer.py +224 -4
  114. mlrun/package/__init__.py +2 -1
  115. mlrun/platforms/__init__.py +0 -43
  116. mlrun/platforms/iguazio.py +8 -4
  117. mlrun/projects/operations.py +17 -11
  118. mlrun/projects/pipelines.py +2 -2
  119. mlrun/projects/project.py +187 -123
  120. mlrun/run.py +95 -21
  121. mlrun/runtimes/__init__.py +2 -186
  122. mlrun/runtimes/base.py +103 -25
  123. mlrun/runtimes/constants.py +225 -0
  124. mlrun/runtimes/daskjob.py +5 -2
  125. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  126. mlrun/runtimes/local.py +5 -2
  127. mlrun/runtimes/mounts.py +20 -2
  128. mlrun/runtimes/nuclio/__init__.py +12 -7
  129. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  130. mlrun/runtimes/nuclio/application/application.py +339 -40
  131. mlrun/runtimes/nuclio/function.py +222 -72
  132. mlrun/runtimes/nuclio/serving.py +132 -42
  133. mlrun/runtimes/pod.py +213 -21
  134. mlrun/runtimes/utils.py +49 -9
  135. mlrun/secrets.py +99 -14
  136. mlrun/serving/__init__.py +2 -0
  137. mlrun/serving/remote.py +84 -11
  138. mlrun/serving/routers.py +26 -44
  139. mlrun/serving/server.py +138 -51
  140. mlrun/serving/serving_wrapper.py +6 -2
  141. mlrun/serving/states.py +997 -283
  142. mlrun/serving/steps.py +62 -0
  143. mlrun/serving/system_steps.py +149 -95
  144. mlrun/serving/v2_serving.py +9 -10
  145. mlrun/track/trackers/mlflow_tracker.py +29 -31
  146. mlrun/utils/helpers.py +292 -94
  147. mlrun/utils/http.py +9 -2
  148. mlrun/utils/notifications/notification/base.py +18 -0
  149. mlrun/utils/notifications/notification/git.py +3 -5
  150. mlrun/utils/notifications/notification/mail.py +39 -16
  151. mlrun/utils/notifications/notification/slack.py +2 -4
  152. mlrun/utils/notifications/notification/webhook.py +2 -5
  153. mlrun/utils/notifications/notification_pusher.py +3 -3
  154. mlrun/utils/version/version.json +2 -2
  155. mlrun/utils/version/version.py +3 -4
  156. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
  157. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
  158. mlrun/api/schemas/__init__.py +0 -259
  159. mlrun/db/auth_utils.py +0 -152
  160. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
  161. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  162. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  163. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
  164. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  165. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  166. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  167. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ from abc import ABC, abstractmethod
18
18
  from collections import defaultdict
19
19
  from collections.abc import Iterator
20
20
  from contextlib import contextmanager, nullcontext
21
- from datetime import datetime, timedelta
21
+ from datetime import UTC, datetime, timedelta
22
22
  from typing import Any, Literal, Optional, Union, cast
23
23
 
24
24
  import pandas as pd
@@ -27,6 +27,7 @@ import mlrun
27
27
  import mlrun.common.constants as mlrun_constants
28
28
  import mlrun.common.helpers
29
29
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
30
+ import mlrun.common.types
30
31
  import mlrun.datastore.datastore_profile as ds_profile
31
32
  import mlrun.errors
32
33
  import mlrun.model_monitoring.api as mm_api
@@ -39,6 +40,12 @@ from mlrun.serving.utils import MonitoringApplicationToDict
39
40
  from mlrun.utils import logger
40
41
 
41
42
 
43
+ class ExistingDataHandling(mlrun.common.types.StrEnum):
44
+ fail_on_overlap = "fail_on_overlap"
45
+ skip_overlap = "skip_overlap"
46
+ delete_all = "delete_all"
47
+
48
+
42
49
  def _serialize_context_and_result(
43
50
  *,
44
51
  context: mm_context.MonitoringApplicationContext,
@@ -226,7 +233,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
226
233
  try:
227
234
  yield endpoints_output, application_schedules.__enter__()
228
235
  finally:
229
- if write_output:
236
+ if write_output and any(endpoints_output.values()):
230
237
  logger.debug(
231
238
  "Pushing model monitoring application job data to the writer stream",
232
239
  passed_stream_profile=str(stream_profile),
@@ -288,7 +295,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
288
295
  end: Optional[str] = None,
289
296
  base_period: Optional[int] = None,
290
297
  write_output: bool = False,
291
- fail_on_overlap: bool = True,
298
+ existing_data_handling: ExistingDataHandling = ExistingDataHandling.fail_on_overlap,
292
299
  stream_profile: Optional[ds_profile.DatastoreProfile] = None,
293
300
  ):
294
301
  """
@@ -325,21 +332,11 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
325
332
  project=project,
326
333
  ) as (endpoints_output, application_schedules):
327
334
 
328
- def call_do_tracking(event: Optional[dict] = None):
335
+ def call_do_tracking(
336
+ monitoring_context: mm_context.MonitoringApplicationContext,
337
+ ):
329
338
  nonlocal endpoints_output
330
339
 
331
- if event is None:
332
- event = {}
333
- monitoring_context = (
334
- mm_context.MonitoringApplicationContext._from_ml_ctx(
335
- event=event,
336
- application_name=application_name,
337
- context=context,
338
- project=project,
339
- sample_df=sample_data,
340
- feature_stats=feature_stats,
341
- )
342
- )
343
340
  result = self.do_tracking(monitoring_context)
344
341
  endpoints_output[monitoring_context.endpoint_id].append(
345
342
  (monitoring_context, result)
@@ -347,99 +344,184 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
347
344
  return result
348
345
 
349
346
  if endpoints is not None:
350
- resolved_endpoints = self._handle_endpoints_type_evaluate(
347
+ resolved_endpoints = self._normalize_and_validate_endpoints(
351
348
  project=project, endpoints=endpoints
352
349
  )
350
+ if (
351
+ write_output
352
+ and existing_data_handling == ExistingDataHandling.delete_all
353
+ ):
354
+ endpoint_ids = [
355
+ endpoint_id for _, endpoint_id in resolved_endpoints
356
+ ]
357
+ context.logger.info(
358
+ "Deleting all the application data before running the application",
359
+ application_name=application_name,
360
+ endpoint_ids=endpoint_ids,
361
+ )
362
+ self._delete_application_data(
363
+ project_name=project.name,
364
+ application_name=application_name,
365
+ endpoint_ids=endpoint_ids,
366
+ application_schedules=application_schedules,
367
+ )
353
368
  for endpoint_name, endpoint_id in resolved_endpoints:
354
- for window_start, window_end in self._window_generator(
369
+ for monitoring_ctx in self._window_generator(
355
370
  start=start,
356
371
  end=end,
357
372
  base_period=base_period,
358
373
  application_schedules=application_schedules,
359
374
  endpoint_id=endpoint_id,
375
+ endpoint_name=endpoint_name,
360
376
  application_name=application_name,
361
- fail_on_overlap=fail_on_overlap,
377
+ existing_data_handling=existing_data_handling,
378
+ sample_data=sample_data,
379
+ context=context,
380
+ project=project,
362
381
  ):
363
- result = call_do_tracking(
364
- event={
365
- mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
366
- mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
367
- mm_constants.ApplicationEvent.START_INFER_TIME: window_start,
368
- mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
369
- }
370
- )
382
+ result = call_do_tracking(monitoring_ctx)
371
383
  result_key = (
372
- f"{endpoint_name}-{endpoint_id}_{window_start.isoformat()}_{window_end.isoformat()}"
373
- if window_start and window_end
384
+ f"{endpoint_name}-{endpoint_id}_{monitoring_ctx.start_infer_time.isoformat()}_{monitoring_ctx.end_infer_time.isoformat()}"
385
+ if monitoring_ctx.start_infer_time
386
+ and monitoring_ctx.end_infer_time
374
387
  else f"{endpoint_name}-{endpoint_id}"
375
388
  )
376
389
 
377
390
  context.log_result(
378
391
  result_key, self._flatten_data_result(result)
379
392
  )
393
+ # Check if no result was produced for any endpoint (e.g., due to no data in all windows)
394
+ if not any(endpoints_output.values()):
395
+ context.logger.warning(
396
+ "No data was found for any of the specified endpoints. "
397
+ "No results were produced",
398
+ application_name=application_name,
399
+ endpoints=endpoints,
400
+ start=start,
401
+ end=end,
402
+ )
380
403
  else:
381
- return self._flatten_data_result(call_do_tracking())
404
+ result = call_do_tracking(
405
+ mm_context.MonitoringApplicationContext._from_ml_ctx(
406
+ context=context,
407
+ project=project,
408
+ application_name=application_name,
409
+ event={},
410
+ sample_df=sample_data,
411
+ feature_stats=feature_stats,
412
+ )
413
+ )
414
+ return self._flatten_data_result(result)
382
415
 
383
416
  @staticmethod
384
- def _handle_endpoints_type_evaluate(
417
+ def _check_endpoints_first_request(
418
+ endpoints: list[mlrun.common.schemas.ModelEndpoint],
419
+ ) -> None:
420
+ """Make sure that all the endpoints have had at least one request"""
421
+ endpoints_no_requests = [
422
+ (endpoint.metadata.name, endpoint.metadata.uid)
423
+ for endpoint in endpoints
424
+ if not endpoint.status.first_request
425
+ ]
426
+ if endpoints_no_requests:
427
+ raise mlrun.errors.MLRunValueError(
428
+ "The following model endpoints have not had any requests yet and "
429
+ "have no data, cannot run the model monitoring application on them: "
430
+ f"{endpoints_no_requests}"
431
+ )
432
+
433
+ @classmethod
434
+ def _normalize_and_validate_endpoints(
435
+ cls,
385
436
  project: "mlrun.MlrunProject",
386
437
  endpoints: Union[
387
438
  list[tuple[str, str]], list[list[str]], list[str], Literal["all"]
388
439
  ],
389
- ) -> Union[list[tuple[str, str]], list[list[str]]]:
390
- if not endpoints:
391
- raise mlrun.errors.MLRunValueError(
392
- "The endpoints list cannot be empty. If you want to run on all the endpoints, "
393
- 'use `endpoints="all"`.'
394
- )
395
-
396
- if isinstance(endpoints, list) and isinstance(endpoints[0], (tuple, list)):
397
- return endpoints
398
-
399
- if not (isinstance(endpoints, list) and isinstance(endpoints[0], str)):
400
- if isinstance(endpoints, str):
401
- if endpoints != "all":
402
- raise mlrun.errors.MLRunValueError(
403
- 'A string input for `endpoints` can only be "all" for all the model endpoints in '
404
- "the project. If you want to select a single model endpoint with the given name, "
405
- f'use a list: `endpoints=["{endpoints}"]`.'
440
+ ) -> list[tuple[str, str]]:
441
+ if isinstance(endpoints, list):
442
+ if all(
443
+ isinstance(endpoint, tuple | list) and len(endpoint) == 2
444
+ for endpoint in endpoints
445
+ ):
446
+ # A list of [(name, uid), ...] / [[name, uid], ...] tuples/lists
447
+ endpoint_uids_to_names = {
448
+ endpoint[1]: endpoint[0] for endpoint in endpoints
449
+ }
450
+ endpoints_list = project.list_model_endpoints(
451
+ uids=list(endpoint_uids_to_names.keys()), latest_only=True
452
+ ).endpoints
453
+
454
+ # Check for missing endpoint uids or name/uid mismatches
455
+ for endpoint in endpoints_list:
456
+ if (
457
+ endpoint_uids_to_names[cast(str, endpoint.metadata.uid)]
458
+ != endpoint.metadata.name
459
+ ):
460
+ raise mlrun.errors.MLRunNotFoundError(
461
+ "Could not find model endpoint with name "
462
+ f"'{endpoint_uids_to_names[cast(str, endpoint.metadata.uid)]}' "
463
+ f"and uid '{endpoint.metadata.uid}'"
464
+ )
465
+ missing = set(endpoint_uids_to_names.keys()) - {
466
+ cast(str, endpoint.metadata.uid) for endpoint in endpoints_list
467
+ }
468
+ if missing:
469
+ raise mlrun.errors.MLRunNotFoundError(
470
+ "Could not find model endpoints with the following uids: "
471
+ f"{missing}"
406
472
  )
407
- else:
408
- raise mlrun.errors.MLRunValueError(
409
- f"Could not resolve endpoints as list of [(name, uid)], {endpoints=}"
410
- )
411
473
 
412
- if endpoints == "all":
413
- endpoint_names = None
414
- else:
415
- endpoint_names = endpoints
416
-
417
- endpoints_list = project.list_model_endpoints(
418
- names=endpoint_names, latest_only=True
419
- ).endpoints
420
- if endpoints_list:
421
- list_endpoints_result = [
422
- (endpoint.metadata.name, endpoint.metadata.uid)
423
- for endpoint in endpoints_list
424
- ]
425
- if endpoints != "all":
474
+ elif all(isinstance(endpoint, str) for endpoint in endpoints):
475
+ # A list of [name, ...] strings
476
+ endpoint_names = cast(list[str], endpoints)
477
+ endpoints_list = project.list_model_endpoints(
478
+ names=endpoint_names, latest_only=True
479
+ ).endpoints
480
+
481
+ # Check for missing endpoint names
426
482
  missing = set(endpoints) - {
427
- endpoint[0] for endpoint in list_endpoints_result
483
+ endpoint.metadata.name for endpoint in endpoints_list
428
484
  }
429
485
  if missing:
430
486
  logger.warning(
431
487
  "Could not list all the required endpoints",
432
- missing_endpoint=missing,
433
- endpoints=list_endpoints_result,
488
+ missing_endpoints=missing,
489
+ endpoints_list=endpoints_list,
434
490
  )
435
- return list_endpoints_result
491
+ else:
492
+ raise mlrun.errors.MLRunValueError(
493
+ "Could not resolve the following list as a list of endpoints:\n"
494
+ f"{endpoints}\n"
495
+ "The list must be either a list of (name, uid) tuples/lists or a list of names."
496
+ )
497
+ elif endpoints == "all":
498
+ endpoints_list = project.list_model_endpoints(latest_only=True).endpoints
499
+ elif isinstance(endpoints, str):
500
+ raise mlrun.errors.MLRunValueError(
501
+ 'A string input for `endpoints` can only be "all" for all the model endpoints in '
502
+ "the project. If you want to select a single model endpoint with the given name, "
503
+ f'use a list: `endpoints=["{endpoints}"]`.'
504
+ )
436
505
  else:
437
- if endpoints != "all":
438
- err_msg_suffix = f" named '{endpoints}'"
506
+ raise mlrun.errors.MLRunValueError(
507
+ "Could not resolve the `endpoints` parameter. The parameter must be either:\n"
508
+ "- a list of (name, uid) tuples/lists\n"
509
+ "- a list of names\n"
510
+ '- the string "all" for all the model endpoints in the project.'
511
+ )
512
+
513
+ if not endpoints_list:
439
514
  raise mlrun.errors.MLRunNotFoundError(
440
- f"Did not find any model endpoints {err_msg_suffix}"
515
+ f"Did not find any model endpoints {endpoints=}"
441
516
  )
442
517
 
518
+ cls._check_endpoints_first_request(endpoints_list)
519
+
520
+ return [
521
+ (endpoint.metadata.name, cast(str, endpoint.metadata.uid))
522
+ for endpoint in endpoints_list
523
+ ]
524
+
443
525
  @staticmethod
444
526
  def _validate_and_get_window_length(
445
527
  *, base_period: int, start_dt: datetime, end_dt: datetime
@@ -481,7 +563,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
481
563
  end_dt: datetime,
482
564
  base_period: Optional[int],
483
565
  application_name: str,
484
- fail_on_overlap: bool,
566
+ existing_data_handling: ExistingDataHandling,
485
567
  ) -> datetime:
486
568
  """Make sure that the (app, endpoint) pair doesn't write output before the last analyzed window"""
487
569
  if application_schedules:
@@ -490,7 +572,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
490
572
  )
491
573
  if last_analyzed:
492
574
  if start_dt < last_analyzed:
493
- if not fail_on_overlap:
575
+ if existing_data_handling == ExistingDataHandling.skip_overlap:
494
576
  if last_analyzed < end_dt and base_period is None:
495
577
  logger.warn(
496
578
  "Setting the start time to last_analyzed since the original start time precedes "
@@ -504,15 +586,17 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
504
586
  else:
505
587
  raise mlrun.errors.MLRunValueError(
506
588
  "The start time for the application and endpoint precedes the last analyzed time: "
507
- f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
589
+ f"start_dt='{start_dt}', last_analyzed='{last_analyzed}', {application_name=}, "
590
+ f"{endpoint_id=}. "
508
591
  "Writing data out of order is not supported, and the start time could not be "
509
592
  "dynamically reset, as last_analyzed is later than the given end time or that "
510
- f"base_period was specified ({end_dt=}, {base_period=})."
593
+ f"base_period was specified (end_dt='{end_dt}', {base_period=})."
511
594
  )
512
595
  else:
513
596
  raise mlrun.errors.MLRunValueError(
514
597
  "The start time for the application and endpoint precedes the last analyzed time: "
515
- f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
598
+ f"start_dt='{start_dt}', last_analyzed='{last_analyzed}', {application_name=}, "
599
+ f"{endpoint_id=}. "
516
600
  "Writing data out of order is not supported. You should change the start time to "
517
601
  f"'{last_analyzed}' or later."
518
602
  )
@@ -525,6 +609,25 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
525
609
  )
526
610
  return start_dt
527
611
 
612
+ @staticmethod
613
+ def _delete_application_data(
614
+ project_name: str,
615
+ application_name: str,
616
+ endpoint_ids: list[str],
617
+ application_schedules: Optional[
618
+ mm_schedules.ModelMonitoringSchedulesFileApplication
619
+ ],
620
+ ) -> None:
621
+ mlrun.get_run_db().delete_model_monitoring_metrics(
622
+ project=project_name,
623
+ application_name=application_name,
624
+ endpoint_ids=endpoint_ids,
625
+ )
626
+ if application_schedules:
627
+ application_schedules.delete_endpoints_last_analyzed(
628
+ endpoint_uids=endpoint_ids
629
+ )
630
+
528
631
  @classmethod
529
632
  def _window_generator(
530
633
  cls,
@@ -535,34 +638,79 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
535
638
  application_schedules: Optional[
536
639
  mm_schedules.ModelMonitoringSchedulesFileApplication
537
640
  ],
641
+ endpoint_name: str,
538
642
  endpoint_id: str,
539
643
  application_name: str,
540
- fail_on_overlap: bool,
541
- ) -> Iterator[tuple[Optional[datetime], Optional[datetime]]]:
644
+ existing_data_handling: ExistingDataHandling,
645
+ context: "mlrun.MLClientCtx",
646
+ project: "mlrun.MlrunProject",
647
+ sample_data: Optional[pd.DataFrame],
648
+ ) -> Iterator[mm_context.MonitoringApplicationContext]:
649
+ def yield_monitoring_ctx(
650
+ window_start: Optional[datetime], window_end: Optional[datetime]
651
+ ) -> Iterator[mm_context.MonitoringApplicationContext]:
652
+ ctx = mm_context.MonitoringApplicationContext._from_ml_ctx(
653
+ event={
654
+ mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
655
+ mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
656
+ mm_constants.ApplicationEvent.START_INFER_TIME: window_start,
657
+ mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
658
+ },
659
+ application_name=application_name,
660
+ context=context,
661
+ project=project,
662
+ sample_df=sample_data,
663
+ )
664
+
665
+ if ctx.sample_df.empty:
666
+ # The current sample is empty
667
+ context.logger.debug(
668
+ "No sample data available for tracking",
669
+ application_name=application_name,
670
+ endpoint_id=ctx.endpoint_id,
671
+ start_time=ctx.start_infer_time,
672
+ end_time=ctx.end_infer_time,
673
+ )
674
+ return
675
+
676
+ yield ctx
677
+
678
+ if application_schedules and window_end:
679
+ application_schedules.update_endpoint_last_analyzed(
680
+ endpoint_uid=endpoint_id, last_analyzed=window_end
681
+ )
682
+
542
683
  if start is None or end is None:
543
684
  # A single window based on the `sample_data` input - see `_handler`.
544
- yield None, None
685
+ yield from yield_monitoring_ctx(None, None)
545
686
  return
546
687
 
547
688
  start_dt = datetime.fromisoformat(start)
548
689
  end_dt = datetime.fromisoformat(end)
549
690
 
550
- start_dt = cls._validate_monotonically_increasing_data(
551
- application_schedules=application_schedules,
552
- endpoint_id=endpoint_id,
553
- start_dt=start_dt,
554
- end_dt=end_dt,
555
- base_period=base_period,
556
- application_name=application_name,
557
- fail_on_overlap=fail_on_overlap,
558
- )
691
+ # If `start_dt` and `end_dt` do not include time zone information - change them to UTC
692
+ if (start_dt.tzinfo is None) and (end_dt.tzinfo is None):
693
+ start_dt = start_dt.replace(tzinfo=UTC)
694
+ end_dt = end_dt.replace(tzinfo=UTC)
695
+ elif (start_dt.tzinfo is None) or (end_dt.tzinfo is None):
696
+ raise mlrun.errors.MLRunValueError(
697
+ "The start and end times must either both include time zone information or both be naive (no time "
698
+ f"zone). Asserting the above failed, aborting the evaluate request: start={start}, end={end}."
699
+ )
700
+
701
+ if existing_data_handling != ExistingDataHandling.delete_all:
702
+ start_dt = cls._validate_monotonically_increasing_data(
703
+ application_schedules=application_schedules,
704
+ endpoint_id=endpoint_id,
705
+ start_dt=start_dt,
706
+ end_dt=end_dt,
707
+ base_period=base_period,
708
+ application_name=application_name,
709
+ existing_data_handling=existing_data_handling,
710
+ )
559
711
 
560
712
  if base_period is None:
561
- yield start_dt, end_dt
562
- if application_schedules:
563
- application_schedules.update_endpoint_last_analyzed(
564
- endpoint_uid=endpoint_id, last_analyzed=end_dt
565
- )
713
+ yield from yield_monitoring_ctx(start_dt, end_dt)
566
714
  return
567
715
 
568
716
  window_length = cls._validate_and_get_window_length(
@@ -572,11 +720,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
572
720
  current_start_time = start_dt
573
721
  while current_start_time < end_dt:
574
722
  current_end_time = min(current_start_time + window_length, end_dt)
575
- yield current_start_time, current_end_time
576
- if application_schedules:
577
- application_schedules.update_endpoint_last_analyzed(
578
- endpoint_uid=endpoint_id, last_analyzed=current_end_time
579
- )
723
+ yield from yield_monitoring_ctx(current_start_time, current_end_time)
580
724
  current_start_time = current_end_time
581
725
 
582
726
  @classmethod
@@ -647,7 +791,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
647
791
  else:
648
792
  class_name = handler_to_class.split(".")[-1].split("::")[0]
649
793
 
650
- job_name = mlrun.utils.normalize_name(class_name, verbose=False)
794
+ job_name = mlrun.utils.normalize_name(class_name)
651
795
 
652
796
  if not mm_constants.APP_NAME_REGEX.fullmatch(job_name):
653
797
  raise mlrun.errors.MLRunValueError(
@@ -655,10 +799,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
655
799
  f"`{mm_constants.APP_NAME_REGEX.pattern}`. "
656
800
  "Please choose another `func_name`."
657
801
  )
658
- if not job_name.endswith(mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX):
659
- job_name += mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX
802
+ job_name, was_renamed, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(
803
+ job_name
804
+ )
805
+ if was_renamed:
660
806
  mlrun.utils.logger.info(
661
- 'Changing function name - adding `"-batch"` suffix', func_name=job_name
807
+ f'Changing function name - adding `"{suffix}"` suffix',
808
+ func_name=job_name,
662
809
  )
663
810
 
664
811
  return job_name
@@ -702,7 +849,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
702
849
  * ``end``, ``datetime``
703
850
  * ``base_period``, ``int``
704
851
  * ``write_output``, ``bool``
705
- * ``fail_on_overlap``, ``bool``
852
+ * ``existing_data_handling``, ``str``
853
+ * ``_init_args``, ``dict`` - the arguments for the application class constructor
854
+ (equivalent to ``class_arguments``)
855
+
856
+ See :py:meth:`~ModelMonitoringApplicationBase.evaluate` for more details
857
+ about these inputs and params.
706
858
 
707
859
  For Git sources, add the source archive to the returned job and change the handler:
708
860
 
@@ -781,6 +933,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
781
933
  image: Optional[str] = None,
782
934
  with_repo: Optional[bool] = False,
783
935
  class_handler: Optional[str] = None,
936
+ class_arguments: Optional[dict[str, Any]] = None,
784
937
  requirements: Optional[Union[str, list[str]]] = None,
785
938
  requirements_file: str = "",
786
939
  endpoints: Union[list[tuple[str, str]], list[str], Literal["all"], None] = None,
@@ -788,7 +941,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
788
941
  end: Optional[datetime] = None,
789
942
  base_period: Optional[int] = None,
790
943
  write_output: bool = False,
791
- fail_on_overlap: bool = True,
944
+ existing_data_handling: ExistingDataHandling = ExistingDataHandling.fail_on_overlap,
792
945
  stream_profile: Optional[ds_profile.DatastoreProfile] = None,
793
946
  ) -> "mlrun.RunObject":
794
947
  """
@@ -796,7 +949,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
796
949
  :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
797
950
  model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
798
951
 
799
- This function has default values for all of its arguments. You should be change them when you want to pass
952
+ This function has default values for all of its arguments. You should change them when you want to pass
800
953
  data to the application.
801
954
 
802
955
  :param func_path: The path to the function. If ``None``, the current notebook is used.
@@ -813,9 +966,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
813
966
  :param reference_data: Pandas data-frame or :py:class:`~mlrun.artifacts.dataset.DatasetArtifact` URI as
814
967
  the reference dataset.
815
968
  When set, its statistics override the model endpoint's feature statistics.
969
+ You do not need to have a model endpoint to use this option.
816
970
  :param image: Docker image to run the job on (when running remotely).
817
971
  :param with_repo: Whether to clone the current repo to the build source.
818
- :param class_handler: The relative path to the class, useful when using Git sources or code from images.
972
+ :param class_handler: The relative path to the application class, useful when using Git sources or code
973
+ from images.
974
+ :param class_arguments: The arguments for the application class constructor. These are passed to the
975
+ class ``__init__``. The values must be JSON-serializable.
819
976
  :param requirements: List of Python requirements to be installed in the image.
820
977
  :param requirements_file: Path to a Python requirements file to be installed in the image.
821
978
  :param endpoints: The model endpoints to get the data from. The options are:
@@ -833,8 +990,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
833
990
  :param start: The start time of the endpoint's data, not included.
834
991
  If you want the model endpoint's data at ``start`` included, you need to subtract a
835
992
  small ``datetime.timedelta`` from it.
836
- Make sure to include the time zone when constructing `datetime.datetime` objects
837
- manually.
993
+ Make sure to include the time zone when constructing ``datetime.datetime`` objects
994
+ manually. When both ``start`` and ``end`` times do not include a time zone, they will
995
+ be treated as UTC.
838
996
  :param end: The end time of the endpoint's data, included.
839
997
  Please note: when ``start`` and ``end`` are set, they create a left-open time interval
840
998
  ("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
@@ -856,11 +1014,18 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
856
1014
  :param write_output: Whether to write the results and metrics to the time-series DB. Can be ``True`` only
857
1015
  if ``endpoints`` are passed.
858
1016
  Note: the model monitoring infrastructure must be up for the writing to work.
859
- :param fail_on_overlap: Relevant only when ``write_output=True``. When ``True``, and the
860
- requested ``start`` time precedes the ``end`` time of a previous run that also
861
- wrote to the database - an error is raised.
862
- If ``False``, when the previously described situation occurs, the relevant time
863
- window is cut so that it starts at the earliest possible time after ``start``.
1017
+ :param existing_data_handling:
1018
+ How to handle the existing application data for the model endpoints when writing
1019
+ new data whose requested ``start`` time precedes the ``end`` time of a previous run
1020
+ that also wrote to the database. Relevant only when ``write_output=True``.
1021
+ The options are:
1022
+
1023
+ - ``"fail_on_overlap"``: Default. An error is raised.
1024
+ - ``"skip_overlap"``: the overlapping data is ignored and the
1025
+ time window is cut so that it starts at the earliest possible time after ``start``.
1026
+ - ``"delete_all"``: delete all the data that was written by the application to the
1027
+ model endpoints, regardless of the time window, and write the new data.
1028
+
864
1029
  :param stream_profile: The stream datastore profile. It should be provided only when running locally and
865
1030
  writing the outputs to the database (i.e., when both ``run_local`` and
866
1031
  ``write_output`` are set to ``True``).
@@ -885,7 +1050,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
885
1050
  project=project,
886
1051
  )
887
1052
 
888
- params: dict[str, Union[list, str, int, None, ds_profile.DatastoreProfile]] = {}
1053
+ params: dict[
1054
+ str, Union[list, dict, str, int, None, ds_profile.DatastoreProfile]
1055
+ ] = {}
889
1056
  if endpoints:
890
1057
  params["endpoints"] = endpoints
891
1058
  if sample_data is None:
@@ -899,18 +1066,6 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
899
1066
  )
900
1067
  params["end"] = end.isoformat() if isinstance(end, datetime) else end
901
1068
  params["base_period"] = base_period
902
- params["write_output"] = write_output
903
- params["fail_on_overlap"] = fail_on_overlap
904
- if stream_profile:
905
- if not run_local:
906
- raise mlrun.errors.MLRunValueError(
907
- "Passing a `stream_profile` is relevant only when running locally"
908
- )
909
- if not write_output:
910
- raise mlrun.errors.MLRunValueError(
911
- "Passing a `stream_profile` is relevant only when writing the outputs"
912
- )
913
- params["stream_profile"] = stream_profile
914
1069
  elif start or end or base_period:
915
1070
  raise mlrun.errors.MLRunValueError(
916
1071
  "Custom `start` and `end` times or base_period are supported only with endpoints data"
@@ -920,6 +1075,22 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
920
1075
  "Writing the application output or passing `stream_profile` are supported only with endpoints data"
921
1076
  )
922
1077
 
1078
+ params["write_output"] = write_output
1079
+ params["existing_data_handling"] = existing_data_handling
1080
+ if stream_profile:
1081
+ if not run_local:
1082
+ raise mlrun.errors.MLRunValueError(
1083
+ "Passing a `stream_profile` is relevant only when running locally"
1084
+ )
1085
+ if not write_output:
1086
+ raise mlrun.errors.MLRunValueError(
1087
+ "Passing a `stream_profile` is relevant only when writing the outputs"
1088
+ )
1089
+ params["stream_profile"] = stream_profile
1090
+
1091
+ if class_arguments:
1092
+ params["_init_args"] = class_arguments
1093
+
923
1094
  inputs: dict[str, str] = {}
924
1095
  for data, identifier in [
925
1096
  (sample_data, "sample_data"),