mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (47) hide show
  1. mlrun/artifacts/base.py +0 -31
  2. mlrun/artifacts/llm_prompt.py +106 -20
  3. mlrun/artifacts/manager.py +0 -5
  4. mlrun/common/constants.py +0 -1
  5. mlrun/common/schemas/__init__.py +1 -0
  6. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  7. mlrun/common/schemas/model_monitoring/functions.py +1 -1
  8. mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
  9. mlrun/common/schemas/workflow.py +0 -1
  10. mlrun/config.py +1 -1
  11. mlrun/datastore/model_provider/model_provider.py +42 -14
  12. mlrun/datastore/model_provider/openai_provider.py +96 -15
  13. mlrun/db/base.py +14 -0
  14. mlrun/db/httpdb.py +42 -9
  15. mlrun/db/nopdb.py +8 -0
  16. mlrun/execution.py +16 -7
  17. mlrun/model.py +15 -0
  18. mlrun/model_monitoring/__init__.py +1 -0
  19. mlrun/model_monitoring/applications/base.py +176 -20
  20. mlrun/model_monitoring/db/_schedules.py +84 -24
  21. mlrun/model_monitoring/db/tsdb/base.py +72 -1
  22. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
  23. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
  24. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
  25. mlrun/model_monitoring/helpers.py +26 -4
  26. mlrun/projects/project.py +38 -12
  27. mlrun/runtimes/daskjob.py +6 -0
  28. mlrun/runtimes/mpijob/abstract.py +6 -0
  29. mlrun/runtimes/mpijob/v1.py +6 -0
  30. mlrun/runtimes/nuclio/application/application.py +2 -0
  31. mlrun/runtimes/nuclio/function.py +6 -0
  32. mlrun/runtimes/nuclio/serving.py +12 -11
  33. mlrun/runtimes/pod.py +21 -0
  34. mlrun/runtimes/remotesparkjob.py +6 -0
  35. mlrun/runtimes/sparkjob/spark3job.py +6 -0
  36. mlrun/serving/__init__.py +2 -0
  37. mlrun/serving/server.py +95 -26
  38. mlrun/serving/states.py +130 -10
  39. mlrun/utils/helpers.py +36 -12
  40. mlrun/utils/retryer.py +15 -2
  41. mlrun/utils/version/version.json +2 -2
  42. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/METADATA +3 -8
  43. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/RECORD +47 -47
  44. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/WHEEL +0 -0
  45. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/entry_points.txt +0 -0
  46. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/licenses/LICENSE +0 -0
  47. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/top_level.txt +0 -0
mlrun/db/httpdb.py CHANGED
@@ -757,7 +757,7 @@ class HTTPRunDB(RunDBInterface):
757
757
  )
758
758
  if response.status_code == http.HTTPStatus.ACCEPTED:
759
759
  background_task = mlrun.common.schemas.BackgroundTask(**response.json())
760
- return self._wait_for_background_task_to_reach_terminal_state(
760
+ return self.wait_for_background_task_to_reach_terminal_state(
761
761
  background_task.metadata.name, project=project
762
762
  )
763
763
  return None
@@ -784,7 +784,7 @@ class HTTPRunDB(RunDBInterface):
784
784
  )
785
785
  if response.status_code == http.HTTPStatus.ACCEPTED:
786
786
  background_task = mlrun.common.schemas.BackgroundTask(**response.json())
787
- background_task = self._wait_for_background_task_to_reach_terminal_state(
787
+ background_task = self.wait_for_background_task_to_reach_terminal_state(
788
788
  background_task.metadata.name, project=project
789
789
  )
790
790
  if (
@@ -839,7 +839,7 @@ class HTTPRunDB(RunDBInterface):
839
839
  )
840
840
  if response.status_code == http.HTTPStatus.ACCEPTED:
841
841
  background_task = mlrun.common.schemas.BackgroundTask(**response.json())
842
- background_task = self._wait_for_background_task_to_reach_terminal_state(
842
+ background_task = self.wait_for_background_task_to_reach_terminal_state(
843
843
  background_task.metadata.name, project=project
844
844
  )
845
845
  if (
@@ -1485,7 +1485,7 @@ class HTTPRunDB(RunDBInterface):
1485
1485
  "Function is being deleted", project_name=project, function_name=name
1486
1486
  )
1487
1487
  background_task = mlrun.common.schemas.BackgroundTask(**response.json())
1488
- background_task = self._wait_for_background_task_to_reach_terminal_state(
1488
+ background_task = self.wait_for_background_task_to_reach_terminal_state(
1489
1489
  background_task.metadata.name, project=project
1490
1490
  )
1491
1491
  if (
@@ -3274,7 +3274,7 @@ class HTTPRunDB(RunDBInterface):
3274
3274
  if response.status_code == http.HTTPStatus.ACCEPTED:
3275
3275
  logger.info("Waiting for project to be deleted", project_name=name)
3276
3276
  background_task = mlrun.common.schemas.BackgroundTask(**response.json())
3277
- background_task = self._wait_for_background_task_to_reach_terminal_state(
3277
+ background_task = self.wait_for_background_task_to_reach_terminal_state(
3278
3278
  background_task.metadata.name
3279
3279
  )
3280
3280
  if (
@@ -3387,7 +3387,7 @@ class HTTPRunDB(RunDBInterface):
3387
3387
  _verify_project_in_terminal_state,
3388
3388
  )
3389
3389
 
3390
- def _wait_for_background_task_to_reach_terminal_state(
3390
+ def wait_for_background_task_to_reach_terminal_state(
3391
3391
  self, name: str, project: str = ""
3392
3392
  ) -> mlrun.common.schemas.BackgroundTask:
3393
3393
  def _verify_background_task_in_terminal_state():
@@ -3408,6 +3408,7 @@ class HTTPRunDB(RunDBInterface):
3408
3408
  logger,
3409
3409
  False,
3410
3410
  _verify_background_task_in_terminal_state,
3411
+ fatal_exceptions=(mlrun.errors.MLRunAccessDeniedError,),
3411
3412
  )
3412
3413
 
3413
3414
  def create_project_secrets(
@@ -4082,7 +4083,7 @@ class HTTPRunDB(RunDBInterface):
4082
4083
  **response.json()
4083
4084
  ).background_tasks
4084
4085
  for task in background_tasks:
4085
- task = self._wait_for_background_task_to_reach_terminal_state(
4086
+ task = self.wait_for_background_task_to_reach_terminal_state(
4086
4087
  task.metadata.name, project=project
4087
4088
  )
4088
4089
  if (
@@ -4119,7 +4120,7 @@ class HTTPRunDB(RunDBInterface):
4119
4120
  **response.json()
4120
4121
  ).background_tasks
4121
4122
  for task in background_tasks:
4122
- task = self._wait_for_background_task_to_reach_terminal_state(
4123
+ task = self.wait_for_background_task_to_reach_terminal_state(
4123
4124
  task.metadata.name, project=project
4124
4125
  )
4125
4126
  if (
@@ -5158,6 +5159,38 @@ class HTTPRunDB(RunDBInterface):
5158
5159
  response = self.api_call("GET", endpoint_path, error_message)
5159
5160
  return mlrun.common.schemas.ProjectSummary(**response.json())
5160
5161
 
5162
+ def get_drift_over_time(
5163
+ self,
5164
+ project: str,
5165
+ start: Optional[datetime] = None,
5166
+ end: Optional[datetime] = None,
5167
+ ) -> mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues:
5168
+ """
5169
+ Get drift counts over time for the project.
5170
+
5171
+ This method returns a list of tuples, each representing a time-interval (in a granularity set by the
5172
+ duration of the given time range) and the number of suspected drifts and detected drifts in that interval.
5173
+ For a range of 6 hours or less, the granularity is 10 minute, for a range of 2 hours to 72 hours, the
5174
+ granularity is 1 hour, and for a range of more than 72 hours, the granularity is 24 hours.
5175
+
5176
+ :param project: The name of the project for which to retrieve drift counts.
5177
+ :param start: Start time of the range to retrieve drift counts from.
5178
+ :param end: End time of the range to retrieve drift counts from.
5179
+
5180
+ :return: A ModelEndpointDriftValues object containing the drift counts over time.
5181
+ """
5182
+ endpoint_path = f"projects/{project}/model-endpoints/drift-over-time"
5183
+ error_message = f"Failed retrieving drift data for {project}"
5184
+ response = self.api_call(
5185
+ method="GET",
5186
+ path=endpoint_path,
5187
+ error=error_message,
5188
+ params={"start": start, "end": end},
5189
+ )
5190
+ return mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues(
5191
+ **response.json()
5192
+ )
5193
+
5161
5194
  @staticmethod
5162
5195
  def _parse_labels(
5163
5196
  labels: Optional[Union[str, dict[str, Optional[str]], list[str]]],
@@ -5478,7 +5511,7 @@ class HTTPRunDB(RunDBInterface):
5478
5511
  def _wait_for_background_task_from_response(self, response):
5479
5512
  if response.status_code == http.HTTPStatus.ACCEPTED:
5480
5513
  background_task = mlrun.common.schemas.BackgroundTask(**response.json())
5481
- return self._wait_for_background_task_to_reach_terminal_state(
5514
+ return self.wait_for_background_task_to_reach_terminal_state(
5482
5515
  background_task.metadata.name
5483
5516
  )
5484
5517
  return None
mlrun/db/nopdb.py CHANGED
@@ -980,3 +980,11 @@ class NopDB(RunDBInterface):
980
980
 
981
981
  def get_project_summary(self, project: str):
982
982
  pass
983
+
984
+ def get_drift_over_time(
985
+ self,
986
+ project: str,
987
+ start: Optional[datetime.datetime] = None,
988
+ end: Optional[datetime.datetime] = None,
989
+ ) -> mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues:
990
+ pass
mlrun/execution.py CHANGED
@@ -94,6 +94,7 @@ class MLClientCtx:
94
94
  self._state_thresholds = {}
95
95
  self._retry_spec = {}
96
96
  self._retry_count = None
97
+ self._retries = []
97
98
 
98
99
  self._labels = {}
99
100
  self._annotations = {}
@@ -468,6 +469,7 @@ class MLClientCtx:
468
469
  for key, uri in status.get("artifact_uris", {}).items():
469
470
  self._artifacts_manager.artifact_uris[key] = uri
470
471
  self._retry_count = status.get("retry_count", self._retry_count)
472
+ self._retries = status.get("retries", self._retries)
471
473
  # if run is a retry, the state needs to move to running
472
474
  if include_status:
473
475
  self._state = status.get("state", self._state)
@@ -911,7 +913,7 @@ class MLClientCtx:
911
913
  def log_llm_prompt(
912
914
  self,
913
915
  key,
914
- prompt_string: Optional[str] = None,
916
+ prompt_template: Optional[list[dict]] = None,
915
917
  prompt_path: Optional[str] = None,
916
918
  prompt_legend: Optional[dict] = None,
917
919
  model_artifact: Union[ModelArtifact, str] = None,
@@ -935,7 +937,7 @@ class MLClientCtx:
935
937
  # Log an inline prompt
936
938
  context.log_llm_prompt(
937
939
  key="qa-prompt",
938
- prompt_string="Q: {question}",
940
+ prompt_template=[{"role: "user", "content": "question with {place_holder}"}],
939
941
  model_artifact=model,
940
942
  prompt_legend={"question": "user_input"},
941
943
  model_configuration={"temperature": 0.7, "max_tokens": 128},
@@ -943,10 +945,16 @@ class MLClientCtx:
943
945
  )
944
946
 
945
947
  :param key: Unique name of the artifact.
946
- :param prompt_string: Raw prompt text as a string. Cannot be used with `prompt_path`.
948
+ :param prompt_template: Raw prompt list of dicts -
949
+ [{"role": "system", "content": "You are a {profession} advisor"},
950
+ "role": "user", "content": "I need your help with {profession}"]. only "role" and "content" keys allow in any
951
+ str format (upper/lower case), keys will be modified to lower case.
952
+ Cannot be used with `prompt_path`.
947
953
  :param prompt_path: Path to a file containing the prompt content. Cannot be used with `prompt_string`.
948
954
  :param prompt_legend: A dictionary where each key is a placeholder in the prompt (e.g., ``{user_name}``)
949
- and the value is a description or explanation of what that placeholder represents.
955
+ and the value is a dictionary holding two keys, "field", "description". "field" points to the field in
956
+ the event where the value of the place-holder inside the event, if None or not exist will be replaced
957
+ with the place-holder name. "description" will point to explanation of what that placeholder represents.
950
958
  Useful for documenting and clarifying dynamic parts of the prompt.
951
959
  :param model_artifact: Reference to the parent model (either `ModelArtifact` or model URI string).
952
960
  :param model_configuration: Dictionary of generation parameters (e.g., temperature, max_tokens).
@@ -961,15 +969,15 @@ class MLClientCtx:
961
969
  :returns: The logged `LLMPromptArtifact` object.
962
970
  """
963
971
 
964
- if not prompt_string and not prompt_path:
972
+ if not prompt_template and not prompt_path:
965
973
  raise mlrun.errors.MLRunInvalidArgumentError(
966
- "Either 'prompt_string' or 'prompt_path' must be provided"
974
+ "Either 'prompt_template' or 'prompt_path' must be provided"
967
975
  )
968
976
 
969
977
  llm_prompt = LLMPromptArtifact(
970
978
  key=key,
971
979
  project=self.project or "",
972
- prompt_string=prompt_string,
980
+ prompt_template=prompt_template,
973
981
  prompt_path=prompt_path,
974
982
  prompt_legend=prompt_legend,
975
983
  model_artifact=model_artifact,
@@ -1267,6 +1275,7 @@ class MLClientCtx:
1267
1275
  "start_time": to_date_str(self._start_time),
1268
1276
  "last_update": to_date_str(self._last_update),
1269
1277
  "retry_count": self._retry_count,
1278
+ "retries": self._retries,
1270
1279
  },
1271
1280
  }
1272
1281
 
mlrun/model.py CHANGED
@@ -1375,6 +1375,7 @@ class RunStatus(ModelObj):
1375
1375
  notifications: Optional[dict[str, Notification]] = None,
1376
1376
  artifact_uris: Optional[dict[str, str]] = None,
1377
1377
  retry_count: Optional[int] = None,
1378
+ retries: Optional[list[dict]] = None,
1378
1379
  ):
1379
1380
  self.state = state or "created"
1380
1381
  self.status_text = status_text
@@ -1393,6 +1394,7 @@ class RunStatus(ModelObj):
1393
1394
  # Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
1394
1395
  self._artifact_uris = artifact_uris or {}
1395
1396
  self._retry_count = retry_count or None
1397
+ self._retries = retries or []
1396
1398
 
1397
1399
  @classmethod
1398
1400
  def from_dict(
@@ -1461,6 +1463,19 @@ class RunStatus(ModelObj):
1461
1463
  """
1462
1464
  self._retry_count = retry_count
1463
1465
 
1466
+ @property
1467
+ def retries(self) -> list[dict]:
1468
+ """List of metadata for each retry attempt."""
1469
+ return self._retries
1470
+
1471
+ @retries.setter
1472
+ def retries(self, retries: list[dict]):
1473
+ """
1474
+ Set the list of retry attempt metadata.
1475
+ :param retries: A list of dictionaries, each representing a retry attempt.
1476
+ """
1477
+ self._retries = retries
1478
+
1464
1479
  def is_failed(self) -> Optional[bool]:
1465
1480
  """
1466
1481
  This method returns whether a run has failed.
@@ -15,4 +15,5 @@
15
15
  from mlrun.common.schemas import ModelEndpoint, ModelEndpointList
16
16
 
17
17
  from .db import get_tsdb_connector
18
+ from .db._schedules import delete_model_monitoring_schedules_user_folder
18
19
  from .helpers import get_stream_path
@@ -17,7 +17,7 @@ import socket
17
17
  from abc import ABC, abstractmethod
18
18
  from collections import defaultdict
19
19
  from collections.abc import Iterator
20
- from contextlib import contextmanager
20
+ from contextlib import contextmanager, nullcontext
21
21
  from datetime import datetime, timedelta
22
22
  from typing import Any, Literal, Optional, Union, cast
23
23
 
@@ -31,6 +31,7 @@ import mlrun.errors
31
31
  import mlrun.model_monitoring.api as mm_api
32
32
  import mlrun.model_monitoring.applications.context as mm_context
33
33
  import mlrun.model_monitoring.applications.results as mm_results
34
+ import mlrun.model_monitoring.db._schedules as mm_schedules
34
35
  import mlrun.model_monitoring.helpers as mm_helpers
35
36
  from mlrun.serving.utils import MonitoringApplicationToDict
36
37
  from mlrun.utils import logger
@@ -183,14 +184,27 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
183
184
  cls,
184
185
  *,
185
186
  write_output: bool,
187
+ application_name: str,
188
+ artifact_path: str,
186
189
  stream_profile: Optional[ds_profile.DatastoreProfile],
187
190
  project: "mlrun.MlrunProject",
188
- ) -> Iterator[dict[str, list[tuple]]]:
191
+ ) -> Iterator[
192
+ tuple[
193
+ dict[str, list[tuple]],
194
+ Optional[mm_schedules.ModelMonitoringSchedulesFileApplication],
195
+ ]
196
+ ]:
189
197
  endpoints_output: dict[str, list[tuple]] = defaultdict(list)
198
+ application_schedules = nullcontext()
190
199
  if write_output:
191
200
  cls._check_writer_is_up(project)
201
+ application_schedules = (
202
+ mm_schedules.ModelMonitoringSchedulesFileApplication(
203
+ artifact_path, application=application_name
204
+ )
205
+ )
192
206
  try:
193
- yield endpoints_output
207
+ yield endpoints_output, application_schedules.__enter__()
194
208
  finally:
195
209
  if write_output:
196
210
  logger.debug(
@@ -218,6 +232,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
218
232
  endpoints_output=endpoints_output,
219
233
  )
220
234
 
235
+ logger.debug(
236
+ "Saving the application schedules",
237
+ application_name=application_name,
238
+ )
239
+ application_schedules.__exit__(None, None, None)
240
+
221
241
  def _handler(
222
242
  self,
223
243
  context: "mlrun.MLClientCtx",
@@ -230,6 +250,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
230
250
  end: Optional[str] = None,
231
251
  base_period: Optional[int] = None,
232
252
  write_output: bool = False,
253
+ allow_unordered_data: bool = False,
233
254
  stream_profile: Optional[ds_profile.DatastoreProfile] = None,
234
255
  ):
235
256
  """
@@ -250,6 +271,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
250
271
  "working with endpoints, without any custom data-frame input"
251
272
  )
252
273
 
274
+ application_name = self.__class__.__name__
275
+
253
276
  feature_stats = (
254
277
  mm_api.get_sample_set_statistics(reference_data)
255
278
  if reference_data is not None
@@ -257,8 +280,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
257
280
  )
258
281
 
259
282
  with self._push_to_writer(
260
- write_output=write_output, stream_profile=stream_profile, project=project
261
- ) as endpoints_output:
283
+ write_output=write_output,
284
+ stream_profile=stream_profile,
285
+ application_name=application_name,
286
+ artifact_path=context.artifact_path,
287
+ project=project,
288
+ ) as (endpoints_output, application_schedules):
262
289
 
263
290
  def call_do_tracking(event: Optional[dict] = None):
264
291
  nonlocal endpoints_output
@@ -268,7 +295,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
268
295
  monitoring_context = (
269
296
  mm_context.MonitoringApplicationContext._from_ml_ctx(
270
297
  event=event,
271
- application_name=self.__class__.__name__,
298
+ application_name=application_name,
272
299
  context=context,
273
300
  project=project,
274
301
  sample_df=sample_data,
@@ -285,10 +312,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
285
312
  resolved_endpoints = self._handle_endpoints_type_evaluate(
286
313
  project=project, endpoints=endpoints
287
314
  )
288
- for window_start, window_end in self._window_generator(
289
- start, end, base_period
290
- ):
291
- for endpoint_name, endpoint_id in resolved_endpoints:
315
+ for endpoint_name, endpoint_id in resolved_endpoints:
316
+ for window_start, window_end in self._window_generator(
317
+ start=start,
318
+ end=end,
319
+ base_period=base_period,
320
+ application_schedules=application_schedules,
321
+ endpoint_id=endpoint_id,
322
+ application_name=application_name,
323
+ allow_unordered_data=allow_unordered_data,
324
+ ):
292
325
  result = call_do_tracking(
293
326
  event={
294
327
  mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
@@ -370,8 +403,103 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
370
403
  )
371
404
 
372
405
  @staticmethod
406
+ def _validate_and_get_window_length(
407
+ *, base_period: int, start_dt: datetime, end_dt: datetime
408
+ ) -> timedelta:
409
+ if not isinstance(base_period, int) or base_period <= 0:
410
+ raise mlrun.errors.MLRunValueError(
411
+ "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
412
+ )
413
+
414
+ window_length = timedelta(minutes=base_period)
415
+
416
+ full_interval_length = end_dt - start_dt
417
+ remainder = full_interval_length % window_length
418
+ if remainder:
419
+ if full_interval_length < window_length:
420
+ extra_msg = (
421
+ "The `base_period` is longer than the difference between `end` and `start`: "
422
+ f"{full_interval_length}. Consider not specifying `base_period`."
423
+ )
424
+ else:
425
+ extra_msg = (
426
+ f"Consider changing the `end` time to `end`={end_dt - remainder}"
427
+ )
428
+ raise mlrun.errors.MLRunValueError(
429
+ "The difference between `end` and `start` must be a multiple of `base_period`: "
430
+ f"`base_period`={window_length}, `start`={start_dt}, `end`={end_dt}. "
431
+ f"{extra_msg}"
432
+ )
433
+ return window_length
434
+
435
+ @staticmethod
436
+ def _validate_monotonically_increasing_data(
437
+ *,
438
+ application_schedules: Optional[
439
+ mm_schedules.ModelMonitoringSchedulesFileApplication
440
+ ],
441
+ endpoint_id: str,
442
+ start_dt: datetime,
443
+ end_dt: datetime,
444
+ base_period: Optional[int],
445
+ application_name: str,
446
+ allow_unordered_data: bool,
447
+ ) -> datetime:
448
+ """Make sure that the (app, endpoint) pair doesn't write output before the last analyzed window"""
449
+ if application_schedules:
450
+ last_analyzed = application_schedules.get_endpoint_last_analyzed(
451
+ endpoint_id
452
+ )
453
+ if last_analyzed:
454
+ if start_dt < last_analyzed:
455
+ if allow_unordered_data:
456
+ if last_analyzed < end_dt and base_period is None:
457
+ logger.warn(
458
+ "Setting the start time to last_analyzed since the original start time precedes "
459
+ "last_analyzed",
460
+ original_start=start_dt,
461
+ new_start=last_analyzed,
462
+ application_name=application_name,
463
+ endpoint_id=endpoint_id,
464
+ )
465
+ start_dt = last_analyzed
466
+ else:
467
+ raise mlrun.errors.MLRunValueError(
468
+ "The start time for the application and endpoint precedes the last analyzed time: "
469
+ f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
470
+ "Writing data out of order is not supported, and the start time could not be "
471
+ "dynamically reset, as last_analyzed is later than the given end time or that "
472
+ f"base_period was specified ({end_dt=}, {base_period=})."
473
+ )
474
+ else:
475
+ raise mlrun.errors.MLRunValueError(
476
+ "The start time for the application and endpoint precedes the last analyzed time: "
477
+ f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
478
+ "Writing data out of order is not supported. You should change the start time to "
479
+ f"'{last_analyzed}' or later."
480
+ )
481
+ else:
482
+ logger.debug(
483
+ "The application is running on the endpoint for the first time",
484
+ endpoint_id=endpoint_id,
485
+ start_dt=start_dt,
486
+ application_name=application_name,
487
+ )
488
+ return start_dt
489
+
490
+ @classmethod
373
491
  def _window_generator(
374
- start: Optional[str], end: Optional[str], base_period: Optional[int]
492
+ cls,
493
+ *,
494
+ start: Optional[str],
495
+ end: Optional[str],
496
+ base_period: Optional[int],
497
+ application_schedules: Optional[
498
+ mm_schedules.ModelMonitoringSchedulesFileApplication
499
+ ],
500
+ endpoint_id: str,
501
+ application_name: str,
502
+ allow_unordered_data: bool,
375
503
  ) -> Iterator[tuple[Optional[datetime], Optional[datetime]]]:
376
504
  if start is None or end is None:
377
505
  # A single window based on the `sample_data` input - see `_handler`.
@@ -381,20 +509,36 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
381
509
  start_dt = datetime.fromisoformat(start)
382
510
  end_dt = datetime.fromisoformat(end)
383
511
 
512
+ start_dt = cls._validate_monotonically_increasing_data(
513
+ application_schedules=application_schedules,
514
+ endpoint_id=endpoint_id,
515
+ start_dt=start_dt,
516
+ end_dt=end_dt,
517
+ base_period=base_period,
518
+ application_name=application_name,
519
+ allow_unordered_data=allow_unordered_data,
520
+ )
521
+
384
522
  if base_period is None:
385
523
  yield start_dt, end_dt
524
+ if application_schedules:
525
+ application_schedules.update_endpoint_last_analyzed(
526
+ endpoint_uid=endpoint_id, last_analyzed=end_dt
527
+ )
386
528
  return
387
529
 
388
- if not isinstance(base_period, int) or base_period <= 0:
389
- raise mlrun.errors.MLRunValueError(
390
- "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
391
- )
530
+ window_length = cls._validate_and_get_window_length(
531
+ base_period=base_period, start_dt=start_dt, end_dt=end_dt
532
+ )
392
533
 
393
- window_length = timedelta(minutes=base_period)
394
534
  current_start_time = start_dt
395
535
  while current_start_time < end_dt:
396
536
  current_end_time = min(current_start_time + window_length, end_dt)
397
537
  yield current_start_time, current_end_time
538
+ if application_schedules:
539
+ application_schedules.update_endpoint_last_analyzed(
540
+ endpoint_uid=endpoint_id, last_analyzed=current_end_time
541
+ )
398
542
  current_start_time = current_end_time
399
543
 
400
544
  @classmethod
@@ -484,6 +628,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
484
628
  * ``end``, ``datetime``
485
629
  * ``base_period``, ``int``
486
630
  * ``write_output``, ``bool``
631
+ * ``allow_unordered_data``, ``bool``
487
632
 
488
633
  For Git sources, add the source archive to the returned job and change the handler:
489
634
 
@@ -567,6 +712,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
567
712
  end: Optional[datetime] = None,
568
713
  base_period: Optional[int] = None,
569
714
  write_output: bool = False,
715
+ allow_unordered_data: bool = False,
570
716
  stream_profile: Optional[ds_profile.DatastoreProfile] = None,
571
717
  ) -> "mlrun.RunObject":
572
718
  """
@@ -608,6 +754,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
608
754
  :param start: The start time of the endpoint's data, not included.
609
755
  If you want the model endpoint's data at ``start`` included, you need to subtract a
610
756
  small ``datetime.timedelta`` from it.
757
+ Make sure to include the time zone when constructing `datetime.datetime` objects
758
+ manually.
611
759
  :param end: The end time of the endpoint's data, included.
612
760
  Please note: when ``start`` and ``end`` are set, they create a left-open time interval
613
761
  ("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
@@ -616,17 +764,24 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
616
764
  taken in the window's data.
617
765
  :param base_period: The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
618
766
  is taken. If an integer is specified, the application is run from ``start`` to ``end``
619
- in ``base_period`` length windows, except for the last window that ends at ``end`` and
620
- therefore may be shorter:
767
+ in ``base_period`` length windows:
621
768
  :math:`(\\operatorname{start}, \\operatorname{start} + \\operatorname{base\\_period}],
622
769
  (\\operatorname{start} + \\operatorname{base\\_period},
623
770
  \\operatorname{start} + 2\\cdot\\operatorname{base\\_period}],
624
771
  ..., (\\operatorname{start} +
625
- m\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
626
- where :math:`m` is some positive integer.
772
+ (m - 1)\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
773
+ where :math:`m` is a positive integer and :math:`\\operatorname{end} =
774
+ \\operatorname{start} + m\\cdot\\operatorname{base\\_period}`.
775
+ Please note that the difference between ``end`` and ``start`` must be a multiple of
776
+ ``base_period``.
627
777
  :param write_output: Whether to write the results and metrics to the time-series DB. Can be ``True`` only
628
778
  if ``endpoints`` are passed.
629
779
  Note: the model monitoring infrastructure must be up for the writing to work.
780
+ :param allow_unordered_data: Relevant only when writing outputs to the database. When ``False``, and the
781
+ requested ``start`` time precedes the ``end`` time of a previous run that also
782
+ wrote to the database - an error is raised.
783
+ If ``True``, when the previously described situation occurs, the relevant time
784
+ window is cut so that it starts at the earliest possible time after ``start``.
630
785
  :param stream_profile: The stream datastore profile. It should be provided only when running locally and
631
786
  writing the outputs to the database (i.e., when both ``run_local`` and
632
787
  ``write_output`` are set to ``True``).
@@ -666,6 +821,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
666
821
  params["end"] = end.isoformat() if isinstance(end, datetime) else end
667
822
  params["base_period"] = base_period
668
823
  params["write_output"] = write_output
824
+ params["allow_unordered_data"] = allow_unordered_data
669
825
  if stream_profile:
670
826
  if not run_local:
671
827
  raise mlrun.errors.MLRunValueError(