mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (48) hide show
  1. mlrun/artifacts/base.py +0 -31
  2. mlrun/artifacts/llm_prompt.py +6 -0
  3. mlrun/artifacts/manager.py +0 -5
  4. mlrun/common/constants.py +1 -0
  5. mlrun/common/schemas/__init__.py +1 -0
  6. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  7. mlrun/common/schemas/model_monitoring/functions.py +1 -1
  8. mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
  9. mlrun/common/schemas/workflow.py +2 -0
  10. mlrun/config.py +1 -1
  11. mlrun/datastore/model_provider/model_provider.py +42 -14
  12. mlrun/datastore/model_provider/openai_provider.py +96 -15
  13. mlrun/db/base.py +20 -0
  14. mlrun/db/httpdb.py +64 -9
  15. mlrun/db/nopdb.py +13 -0
  16. mlrun/launcher/local.py +13 -0
  17. mlrun/model_monitoring/__init__.py +1 -0
  18. mlrun/model_monitoring/applications/base.py +176 -20
  19. mlrun/model_monitoring/db/_schedules.py +84 -24
  20. mlrun/model_monitoring/db/tsdb/base.py +72 -1
  21. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
  22. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
  23. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
  24. mlrun/model_monitoring/helpers.py +26 -4
  25. mlrun/projects/pipelines.py +44 -24
  26. mlrun/projects/project.py +26 -7
  27. mlrun/runtimes/daskjob.py +6 -0
  28. mlrun/runtimes/mpijob/abstract.py +6 -0
  29. mlrun/runtimes/mpijob/v1.py +6 -0
  30. mlrun/runtimes/nuclio/application/application.py +2 -0
  31. mlrun/runtimes/nuclio/function.py +6 -0
  32. mlrun/runtimes/nuclio/serving.py +12 -11
  33. mlrun/runtimes/pod.py +21 -0
  34. mlrun/runtimes/remotesparkjob.py +6 -0
  35. mlrun/runtimes/sparkjob/spark3job.py +6 -0
  36. mlrun/runtimes/utils.py +0 -2
  37. mlrun/serving/server.py +122 -53
  38. mlrun/serving/states.py +128 -44
  39. mlrun/serving/system_steps.py +84 -58
  40. mlrun/utils/helpers.py +82 -12
  41. mlrun/utils/retryer.py +15 -2
  42. mlrun/utils/version/version.json +2 -2
  43. {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA +2 -7
  44. {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/RECORD +48 -48
  45. {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/WHEEL +0 -0
  46. {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/entry_points.txt +0 -0
  47. {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/licenses/LICENSE +0 -0
  48. {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ import socket
17
17
  from abc import ABC, abstractmethod
18
18
  from collections import defaultdict
19
19
  from collections.abc import Iterator
20
- from contextlib import contextmanager
20
+ from contextlib import contextmanager, nullcontext
21
21
  from datetime import datetime, timedelta
22
22
  from typing import Any, Literal, Optional, Union, cast
23
23
 
@@ -31,6 +31,7 @@ import mlrun.errors
31
31
  import mlrun.model_monitoring.api as mm_api
32
32
  import mlrun.model_monitoring.applications.context as mm_context
33
33
  import mlrun.model_monitoring.applications.results as mm_results
34
+ import mlrun.model_monitoring.db._schedules as mm_schedules
34
35
  import mlrun.model_monitoring.helpers as mm_helpers
35
36
  from mlrun.serving.utils import MonitoringApplicationToDict
36
37
  from mlrun.utils import logger
@@ -183,14 +184,27 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
183
184
  cls,
184
185
  *,
185
186
  write_output: bool,
187
+ application_name: str,
188
+ artifact_path: str,
186
189
  stream_profile: Optional[ds_profile.DatastoreProfile],
187
190
  project: "mlrun.MlrunProject",
188
- ) -> Iterator[dict[str, list[tuple]]]:
191
+ ) -> Iterator[
192
+ tuple[
193
+ dict[str, list[tuple]],
194
+ Optional[mm_schedules.ModelMonitoringSchedulesFileApplication],
195
+ ]
196
+ ]:
189
197
  endpoints_output: dict[str, list[tuple]] = defaultdict(list)
198
+ application_schedules = nullcontext()
190
199
  if write_output:
191
200
  cls._check_writer_is_up(project)
201
+ application_schedules = (
202
+ mm_schedules.ModelMonitoringSchedulesFileApplication(
203
+ artifact_path, application=application_name
204
+ )
205
+ )
192
206
  try:
193
- yield endpoints_output
207
+ yield endpoints_output, application_schedules.__enter__()
194
208
  finally:
195
209
  if write_output:
196
210
  logger.debug(
@@ -218,6 +232,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
218
232
  endpoints_output=endpoints_output,
219
233
  )
220
234
 
235
+ logger.debug(
236
+ "Saving the application schedules",
237
+ application_name=application_name,
238
+ )
239
+ application_schedules.__exit__(None, None, None)
240
+
221
241
  def _handler(
222
242
  self,
223
243
  context: "mlrun.MLClientCtx",
@@ -230,6 +250,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
230
250
  end: Optional[str] = None,
231
251
  base_period: Optional[int] = None,
232
252
  write_output: bool = False,
253
+ allow_unordered_data: bool = False,
233
254
  stream_profile: Optional[ds_profile.DatastoreProfile] = None,
234
255
  ):
235
256
  """
@@ -250,6 +271,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
250
271
  "working with endpoints, without any custom data-frame input"
251
272
  )
252
273
 
274
+ application_name = self.__class__.__name__
275
+
253
276
  feature_stats = (
254
277
  mm_api.get_sample_set_statistics(reference_data)
255
278
  if reference_data is not None
@@ -257,8 +280,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
257
280
  )
258
281
 
259
282
  with self._push_to_writer(
260
- write_output=write_output, stream_profile=stream_profile, project=project
261
- ) as endpoints_output:
283
+ write_output=write_output,
284
+ stream_profile=stream_profile,
285
+ application_name=application_name,
286
+ artifact_path=context.artifact_path,
287
+ project=project,
288
+ ) as (endpoints_output, application_schedules):
262
289
 
263
290
  def call_do_tracking(event: Optional[dict] = None):
264
291
  nonlocal endpoints_output
@@ -268,7 +295,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
268
295
  monitoring_context = (
269
296
  mm_context.MonitoringApplicationContext._from_ml_ctx(
270
297
  event=event,
271
- application_name=self.__class__.__name__,
298
+ application_name=application_name,
272
299
  context=context,
273
300
  project=project,
274
301
  sample_df=sample_data,
@@ -285,10 +312,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
285
312
  resolved_endpoints = self._handle_endpoints_type_evaluate(
286
313
  project=project, endpoints=endpoints
287
314
  )
288
- for window_start, window_end in self._window_generator(
289
- start, end, base_period
290
- ):
291
- for endpoint_name, endpoint_id in resolved_endpoints:
315
+ for endpoint_name, endpoint_id in resolved_endpoints:
316
+ for window_start, window_end in self._window_generator(
317
+ start=start,
318
+ end=end,
319
+ base_period=base_period,
320
+ application_schedules=application_schedules,
321
+ endpoint_id=endpoint_id,
322
+ application_name=application_name,
323
+ allow_unordered_data=allow_unordered_data,
324
+ ):
292
325
  result = call_do_tracking(
293
326
  event={
294
327
  mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
@@ -370,8 +403,103 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
370
403
  )
371
404
 
372
405
  @staticmethod
406
+ def _validate_and_get_window_length(
407
+ *, base_period: int, start_dt: datetime, end_dt: datetime
408
+ ) -> timedelta:
409
+ if not isinstance(base_period, int) or base_period <= 0:
410
+ raise mlrun.errors.MLRunValueError(
411
+ "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
412
+ )
413
+
414
+ window_length = timedelta(minutes=base_period)
415
+
416
+ full_interval_length = end_dt - start_dt
417
+ remainder = full_interval_length % window_length
418
+ if remainder:
419
+ if full_interval_length < window_length:
420
+ extra_msg = (
421
+ "The `base_period` is longer than the difference between `end` and `start`: "
422
+ f"{full_interval_length}. Consider not specifying `base_period`."
423
+ )
424
+ else:
425
+ extra_msg = (
426
+ f"Consider changing the `end` time to `end`={end_dt - remainder}"
427
+ )
428
+ raise mlrun.errors.MLRunValueError(
429
+ "The difference between `end` and `start` must be a multiple of `base_period`: "
430
+ f"`base_period`={window_length}, `start`={start_dt}, `end`={end_dt}. "
431
+ f"{extra_msg}"
432
+ )
433
+ return window_length
434
+
435
+ @staticmethod
436
+ def _validate_monotonically_increasing_data(
437
+ *,
438
+ application_schedules: Optional[
439
+ mm_schedules.ModelMonitoringSchedulesFileApplication
440
+ ],
441
+ endpoint_id: str,
442
+ start_dt: datetime,
443
+ end_dt: datetime,
444
+ base_period: Optional[int],
445
+ application_name: str,
446
+ allow_unordered_data: bool,
447
+ ) -> datetime:
448
+ """Make sure that the (app, endpoint) pair doesn't write output before the last analyzed window"""
449
+ if application_schedules:
450
+ last_analyzed = application_schedules.get_endpoint_last_analyzed(
451
+ endpoint_id
452
+ )
453
+ if last_analyzed:
454
+ if start_dt < last_analyzed:
455
+ if allow_unordered_data:
456
+ if last_analyzed < end_dt and base_period is None:
457
+ logger.warn(
458
+ "Setting the start time to last_analyzed since the original start time precedes "
459
+ "last_analyzed",
460
+ original_start=start_dt,
461
+ new_start=last_analyzed,
462
+ application_name=application_name,
463
+ endpoint_id=endpoint_id,
464
+ )
465
+ start_dt = last_analyzed
466
+ else:
467
+ raise mlrun.errors.MLRunValueError(
468
+ "The start time for the application and endpoint precedes the last analyzed time: "
469
+ f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
470
+ "Writing data out of order is not supported, and the start time could not be "
471
+ "dynamically reset, as last_analyzed is later than the given end time or that "
472
+ f"base_period was specified ({end_dt=}, {base_period=})."
473
+ )
474
+ else:
475
+ raise mlrun.errors.MLRunValueError(
476
+ "The start time for the application and endpoint precedes the last analyzed time: "
477
+ f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
478
+ "Writing data out of order is not supported. You should change the start time to "
479
+ f"'{last_analyzed}' or later."
480
+ )
481
+ else:
482
+ logger.debug(
483
+ "The application is running on the endpoint for the first time",
484
+ endpoint_id=endpoint_id,
485
+ start_dt=start_dt,
486
+ application_name=application_name,
487
+ )
488
+ return start_dt
489
+
490
+ @classmethod
373
491
  def _window_generator(
374
- start: Optional[str], end: Optional[str], base_period: Optional[int]
492
+ cls,
493
+ *,
494
+ start: Optional[str],
495
+ end: Optional[str],
496
+ base_period: Optional[int],
497
+ application_schedules: Optional[
498
+ mm_schedules.ModelMonitoringSchedulesFileApplication
499
+ ],
500
+ endpoint_id: str,
501
+ application_name: str,
502
+ allow_unordered_data: bool,
375
503
  ) -> Iterator[tuple[Optional[datetime], Optional[datetime]]]:
376
504
  if start is None or end is None:
377
505
  # A single window based on the `sample_data` input - see `_handler`.
@@ -381,20 +509,36 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
381
509
  start_dt = datetime.fromisoformat(start)
382
510
  end_dt = datetime.fromisoformat(end)
383
511
 
512
+ start_dt = cls._validate_monotonically_increasing_data(
513
+ application_schedules=application_schedules,
514
+ endpoint_id=endpoint_id,
515
+ start_dt=start_dt,
516
+ end_dt=end_dt,
517
+ base_period=base_period,
518
+ application_name=application_name,
519
+ allow_unordered_data=allow_unordered_data,
520
+ )
521
+
384
522
  if base_period is None:
385
523
  yield start_dt, end_dt
524
+ if application_schedules:
525
+ application_schedules.update_endpoint_last_analyzed(
526
+ endpoint_uid=endpoint_id, last_analyzed=end_dt
527
+ )
386
528
  return
387
529
 
388
- if not isinstance(base_period, int) or base_period <= 0:
389
- raise mlrun.errors.MLRunValueError(
390
- "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
391
- )
530
+ window_length = cls._validate_and_get_window_length(
531
+ base_period=base_period, start_dt=start_dt, end_dt=end_dt
532
+ )
392
533
 
393
- window_length = timedelta(minutes=base_period)
394
534
  current_start_time = start_dt
395
535
  while current_start_time < end_dt:
396
536
  current_end_time = min(current_start_time + window_length, end_dt)
397
537
  yield current_start_time, current_end_time
538
+ if application_schedules:
539
+ application_schedules.update_endpoint_last_analyzed(
540
+ endpoint_uid=endpoint_id, last_analyzed=current_end_time
541
+ )
398
542
  current_start_time = current_end_time
399
543
 
400
544
  @classmethod
@@ -484,6 +628,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
484
628
  * ``end``, ``datetime``
485
629
  * ``base_period``, ``int``
486
630
  * ``write_output``, ``bool``
631
+ * ``allow_unordered_data``, ``bool``
487
632
 
488
633
  For Git sources, add the source archive to the returned job and change the handler:
489
634
 
@@ -567,6 +712,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
567
712
  end: Optional[datetime] = None,
568
713
  base_period: Optional[int] = None,
569
714
  write_output: bool = False,
715
+ allow_unordered_data: bool = False,
570
716
  stream_profile: Optional[ds_profile.DatastoreProfile] = None,
571
717
  ) -> "mlrun.RunObject":
572
718
  """
@@ -608,6 +754,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
608
754
  :param start: The start time of the endpoint's data, not included.
609
755
  If you want the model endpoint's data at ``start`` included, you need to subtract a
610
756
  small ``datetime.timedelta`` from it.
757
+ Make sure to include the time zone when constructing `datetime.datetime` objects
758
+ manually.
611
759
  :param end: The end time of the endpoint's data, included.
612
760
  Please note: when ``start`` and ``end`` are set, they create a left-open time interval
613
761
  ("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
@@ -616,17 +764,24 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
616
764
  taken in the window's data.
617
765
  :param base_period: The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
618
766
  is taken. If an integer is specified, the application is run from ``start`` to ``end``
619
- in ``base_period`` length windows, except for the last window that ends at ``end`` and
620
- therefore may be shorter:
767
+ in ``base_period`` length windows:
621
768
  :math:`(\\operatorname{start}, \\operatorname{start} + \\operatorname{base\\_period}],
622
769
  (\\operatorname{start} + \\operatorname{base\\_period},
623
770
  \\operatorname{start} + 2\\cdot\\operatorname{base\\_period}],
624
771
  ..., (\\operatorname{start} +
625
- m\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
626
- where :math:`m` is some positive integer.
772
+ (m - 1)\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
773
+ where :math:`m` is a positive integer and :math:`\\operatorname{end} =
774
+ \\operatorname{start} + m\\cdot\\operatorname{base\\_period}`.
775
+ Please note that the difference between ``end`` and ``start`` must be a multiple of
776
+ ``base_period``.
627
777
  :param write_output: Whether to write the results and metrics to the time-series DB. Can be ``True`` only
628
778
  if ``endpoints`` are passed.
629
779
  Note: the model monitoring infrastructure must be up for the writing to work.
780
+ :param allow_unordered_data: Relevant only when writing outputs to the database. When ``False``, and the
781
+ requested ``start`` time precedes the ``end`` time of a previous run that also
782
+ wrote to the database - an error is raised.
783
+ If ``True``, when the previously described situation occurs, the relevant time
784
+ window is cut so that it starts at the earliest possible time after ``start``.
630
785
  :param stream_profile: The stream datastore profile. It should be provided only when running locally and
631
786
  writing the outputs to the database (i.e., when both ``run_local`` and
632
787
  ``write_output`` are set to ``True``).
@@ -666,6 +821,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
666
821
  params["end"] = end.isoformat() if isinstance(end, datetime) else end
667
822
  params["base_period"] = base_period
668
823
  params["write_output"] = write_output
824
+ params["allow_unordered_data"] = allow_unordered_data
669
825
  if stream_profile:
670
826
  if not run_local:
671
827
  raise mlrun.errors.MLRunValueError(
@@ -13,25 +13,36 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import json
16
+ import sys
16
17
  from abc import ABC, abstractmethod
17
18
  from contextlib import AbstractContextManager
19
+ from datetime import datetime, timezone
18
20
  from types import TracebackType
19
- from typing import Final, Optional
21
+ from typing import TYPE_CHECKING, Final, Optional
20
22
 
21
23
  import botocore.exceptions
22
24
 
25
+ import mlrun
23
26
  import mlrun.common.schemas as schemas
24
27
  import mlrun.errors
25
28
  import mlrun.model_monitoring.helpers
29
+ import mlrun.utils.helpers
26
30
  from mlrun.utils import logger
27
31
 
32
+ if TYPE_CHECKING:
33
+ if sys.version_info >= (3, 11):
34
+ from typing import Self
35
+ else:
36
+ from typing_extensions import Self
37
+
28
38
 
29
39
  class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
30
40
  DEFAULT_SCHEDULES: Final = {}
31
41
  INITIAL_CONTENT = json.dumps(DEFAULT_SCHEDULES)
32
42
  ENCODING = "utf-8"
33
43
 
34
- def __init__(self):
44
+ def __init__(self) -> None:
45
+ # `self._item` is the persistent version of the monitoring schedules.
35
46
  self._item = self.get_data_item_object()
36
47
  if self._item:
37
48
  self._path = self._item.url
@@ -43,9 +54,16 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
43
54
  self._open_schedules = False
44
55
 
45
56
  @abstractmethod
46
- def get_data_item_object(self) -> mlrun.DataItem:
57
+ def get_data_item_object(self) -> "mlrun.DataItem":
47
58
  pass
48
59
 
60
+ def _exists(self) -> bool:
61
+ """Return whether the file exists or not"""
62
+ return (
63
+ self._fs is None # In-memory store
64
+ or self._fs.exists(self._path)
65
+ )
66
+
49
67
  def create(self) -> None:
50
68
  """Create a schedules file with initial content - an empty dictionary"""
51
69
  logger.debug("Creating model monitoring schedules file", path=self._item.url)
@@ -53,10 +71,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
53
71
 
54
72
  def delete(self) -> None:
55
73
  """Delete schedules file if it exists"""
56
- if (
57
- self._fs is None # In-memory store
58
- or self._fs.exists(self._path)
59
- ):
74
+ if self._exists():
60
75
  logger.debug(
61
76
  "Deleting model monitoring schedules file", path=self._item.url
62
77
  )
@@ -100,7 +115,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
100
115
  self._schedules = self.DEFAULT_SCHEDULES
101
116
  self._open_schedules = False
102
117
 
103
- def __enter__(self) -> "ModelMonitoringSchedulesFileBase":
118
+ def __enter__(self) -> "Self":
104
119
  self._open()
105
120
  return super().__enter__()
106
121
 
@@ -129,12 +144,11 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
129
144
  :param project: The project name.
130
145
  :param endpoint_id: The endpoint ID.
131
146
  """
132
- # `self._item` is the persistent version of the monitoring schedules.
133
147
  self._project = project
134
148
  self._endpoint_id = endpoint_id
135
149
  super().__init__()
136
150
 
137
- def get_data_item_object(self) -> mlrun.DataItem:
151
+ def get_data_item_object(self) -> "mlrun.DataItem":
138
152
  return mlrun.model_monitoring.helpers.get_monitoring_schedules_endpoint_data(
139
153
  project=self._project, endpoint_id=self._endpoint_id
140
154
  )
@@ -179,7 +193,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
179
193
  self._project = project
180
194
  super().__init__()
181
195
 
182
- def get_data_item_object(self) -> mlrun.DataItem:
196
+ def get_data_item_object(self) -> "mlrun.DataItem":
183
197
  return mlrun.model_monitoring.helpers.get_monitoring_schedules_chief_data(
184
198
  project=self._project
185
199
  )
@@ -216,22 +230,49 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
216
230
  return set(self._schedules.keys())
217
231
 
218
232
  def get_or_create(self) -> None:
219
- try:
220
- self._open()
221
- except (
222
- mlrun.errors.MLRunNotFoundError,
223
- # Different errors are raised for S3 or local storage, see ML-8042
224
- botocore.exceptions.ClientError,
225
- FileNotFoundError,
226
- ):
233
+ if not self._exists():
227
234
  self.create()
228
235
 
229
236
 
230
- def delete_model_monitoring_schedules_folder(project: str) -> None:
231
- """Delete the model monitoring schedules folder of the project"""
232
- folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_folder_path(
233
- project
234
- )
237
+ class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
238
+ def __init__(self, out_path: str, application: str) -> None:
239
+ self._out_path = out_path
240
+ self._application = application
241
+ super().__init__()
242
+
243
+ def get_data_item_object(self) -> "mlrun.DataItem":
244
+ return mlrun.model_monitoring.helpers.get_monitoring_schedules_user_application_data(
245
+ out_path=self._out_path, application=self._application
246
+ )
247
+
248
+ def _open(self) -> None:
249
+ if not self._exists():
250
+ # Create the file when it is needed the first time
251
+ logger.info(
252
+ "Creating the application schedules file",
253
+ application=self._application,
254
+ path=self._path,
255
+ )
256
+ self.create()
257
+ super()._open()
258
+
259
+ def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[datetime]:
260
+ self._check_open_schedules()
261
+ if endpoint_uid in self._schedules:
262
+ return datetime.fromisoformat(self._schedules[endpoint_uid])
263
+ else:
264
+ return None
265
+
266
+ def update_endpoint_last_analyzed(
267
+ self, endpoint_uid: str, last_analyzed: datetime
268
+ ) -> None:
269
+ self._check_open_schedules()
270
+ self._schedules[endpoint_uid] = last_analyzed.astimezone(
271
+ timezone.utc
272
+ ).isoformat()
273
+
274
+
275
+ def _delete_folder(folder: str) -> None:
235
276
  fs = mlrun.datastore.store_manager.object(folder).store.filesystem
236
277
  if fs and fs.exists(folder):
237
278
  logger.debug("Deleting model monitoring schedules folder", folder=folder)
@@ -240,3 +281,22 @@ def delete_model_monitoring_schedules_folder(project: str) -> None:
240
281
  raise mlrun.errors.MLRunValueError(
241
282
  "Cannot delete a folder without a file-system"
242
283
  )
284
+
285
+
286
+ def delete_model_monitoring_schedules_folder(project: str) -> None:
287
+ """Delete the model monitoring schedules folder of the project"""
288
+ folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_folder_path(
289
+ project
290
+ )
291
+ _delete_folder(folder)
292
+
293
+
294
+ def delete_model_monitoring_schedules_user_folder(project: str) -> None:
295
+ """Delete the user created schedules folder (created through `app.evaluate`)"""
296
+ out_path = mlrun.utils.helpers.template_artifact_path(
297
+ mlrun.mlconf.artifact_path, project=project
298
+ )
299
+ folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_user_folder_path(
300
+ out_path
301
+ )
302
+ _delete_folder(folder)
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from abc import ABC, abstractmethod
16
- from datetime import datetime
16
+ from datetime import datetime, timedelta
17
17
  from typing import Callable, ClassVar, Literal, Optional, Union
18
18
 
19
19
  import pandas as pd
@@ -80,6 +80,21 @@ class TSDBConnector(ABC):
80
80
  :raise mlrun.errors.MLRunRuntimeError: If an error occurred while writing the event.
81
81
  """
82
82
 
83
+ @abstractmethod
84
+ def get_drift_data(
85
+ self,
86
+ start: datetime,
87
+ end: datetime,
88
+ ) -> mm_schemas.ModelEndpointDriftValues:
89
+ """
90
+ Fetches drift counts per interval in the specified time range.
91
+
92
+ :param start: The start time of the query.
93
+ :param end: The end time of the query.
94
+
95
+ :return: A ModelEndpointDriftValues object containing drift data.
96
+ """
97
+
83
98
  @abstractmethod
84
99
  def delete_tsdb_records(
85
100
  self,
@@ -703,3 +718,59 @@ class TSDBConnector(ABC):
703
718
  )
704
719
  )
705
720
  return {dict_key: metrics}
721
+
722
+ @staticmethod
723
+ def _prepare_aligned_start_end(
724
+ start: datetime, end: datetime
725
+ ) -> tuple[datetime, datetime, str]:
726
+ delta = end - start
727
+ if delta <= timedelta(hours=6):
728
+ interval = "10m"
729
+ start = start.replace(
730
+ minute=start.minute // 10 * 10, second=0, microsecond=0
731
+ )
732
+ elif delta <= timedelta(hours=72):
733
+ interval = "1h"
734
+ start = start.replace(minute=0, second=0, microsecond=0)
735
+ else:
736
+ interval = "1d"
737
+ start = start.replace(hour=0, minute=0, second=0, microsecond=0)
738
+
739
+ interval_map = {
740
+ "10m": timedelta(minutes=10),
741
+ "1h": timedelta(hours=1),
742
+ "1d": timedelta(days=1),
743
+ }
744
+ delta = end - start
745
+ interval_td = interval_map[interval]
746
+ end = start + (delta // interval_td) * interval_td
747
+ return start, end, interval
748
+
749
+ @staticmethod
750
+ def _df_to_drift_data(df: pd.DataFrame) -> mm_schemas.ModelEndpointDriftValues:
751
+ suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
752
+ detected_val = mm_schemas.constants.ResultStatusApp.detected.value
753
+ aggregated_df = (
754
+ df.groupby(["_wstart", f"max({mm_schemas.ResultData.RESULT_STATUS})"])
755
+ .size() # add size column for each interval x result-status combination
756
+ .unstack() # create a size column for each result-status
757
+ .reindex(
758
+ columns=[suspected_val, detected_val], fill_value=0
759
+ ) # ensure both columns exists
760
+ .fillna(0)
761
+ .astype(int)
762
+ .rename(
763
+ columns={
764
+ suspected_val: "count_suspected",
765
+ detected_val: "count_detected",
766
+ }
767
+ )
768
+ )
769
+ values = list(
770
+ zip(
771
+ aggregated_df.index,
772
+ aggregated_df["count_suspected"],
773
+ aggregated_df["count_detected"],
774
+ )
775
+ )
776
+ return mm_schemas.ModelEndpointDriftValues(values=values)
@@ -165,6 +165,7 @@ class TDEngineSchema:
165
165
  preform_agg_funcs_columns: Optional[list[str]] = None,
166
166
  order_by: Optional[str] = None,
167
167
  desc: Optional[bool] = None,
168
+ partition_by: Optional[str] = None,
168
169
  ) -> str:
169
170
  if agg_funcs and not columns_to_filter:
170
171
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -176,7 +177,10 @@ class TDEngineSchema:
176
177
  raise mlrun.errors.MLRunInvalidArgumentError(
177
178
  "`agg_funcs` must be provided when using interval"
178
179
  )
179
-
180
+ if partition_by and not agg_funcs:
181
+ raise mlrun.errors.MLRunInvalidArgumentError(
182
+ "`agg_funcs` must be provided when using partition by"
183
+ )
180
184
  if sliding_window_step and not interval:
181
185
  raise mlrun.errors.MLRunInvalidArgumentError(
182
186
  "`interval` must be provided when using sliding window"
@@ -232,6 +236,8 @@ class TDEngineSchema:
232
236
  if isinstance(group_by, list):
233
237
  group_by = ", ".join(group_by)
234
238
  query.write(f" GROUP BY {group_by}")
239
+ if partition_by:
240
+ query.write(f" PARTITION BY {partition_by}")
235
241
  if order_by:
236
242
  desc = " DESC" if desc else ""
237
243
  query.write(f" ORDER BY {order_by}{desc}")