PyPI - mlrun - Versions diffs - 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl - Mend

mlrun 1.10.0rc13py3-none-any.whl → 1.10.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/base.py +0 -31
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +123 -25
mlrun/artifacts/manager.py +0 -5
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +10 -1
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +3 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +3 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +14 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +3 -1
mlrun/common/secrets.py +22 -1
mlrun/config.py +33 -11
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/datastore.py +9 -4
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +363 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +230 -65
mlrun/datastore/model_provider/openai_provider.py +295 -42
mlrun/datastore/s3.py +24 -2
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +47 -19
mlrun/db/httpdb.py +120 -56
mlrun/db/nopdb.py +38 -10
mlrun/execution.py +70 -19
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +15 -0
mlrun/model.py +24 -3
mlrun/model_monitoring/__init__.py +1 -0
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +509 -117
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +116 -33
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +100 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
mlrun/model_monitoring/helpers.py +54 -9
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +6 -1
mlrun/projects/pipelines.py +46 -26
mlrun/projects/project.py +166 -58
mlrun/run.py +94 -17
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +7 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/mpijob/abstract.py +6 -0
mlrun/runtimes/mpijob/v1.py +6 -0
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +149 -17
mlrun/runtimes/nuclio/function.py +76 -27
mlrun/runtimes/nuclio/serving.py +97 -15
mlrun/runtimes/pod.py +234 -21
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/runtimes/utils.py +49 -11
mlrun/secrets.py +54 -13
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +320 -80
mlrun/serving/states.py +725 -157
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +200 -119
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +288 -88
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/retryer.py +15 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/applications/base.py CHANGED Viewed

@@ -17,25 +17,35 @@ import socket
 from abc import ABC, abstractmethod
 from collections import defaultdict
 from collections.abc import Iterator
-from contextlib import contextmanager
-from datetime import datetime, timedelta
+from contextlib import contextmanager, nullcontext
+from datetime import datetime, timedelta, timezone
 from typing import Any, Literal, Optional, Union, cast
 import pandas as pd
 import mlrun
 import mlrun.common.constants as mlrun_constants
+import mlrun.common.helpers
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.common.types
 import mlrun.datastore.datastore_profile as ds_profile
 import mlrun.errors
 import mlrun.model_monitoring.api as mm_api
 import mlrun.model_monitoring.applications.context as mm_context
 import mlrun.model_monitoring.applications.results as mm_results
+import mlrun.model_monitoring.db._schedules as mm_schedules
 import mlrun.model_monitoring.helpers as mm_helpers
+import mlrun.utils
 from mlrun.serving.utils import MonitoringApplicationToDict
 from mlrun.utils import logger
+class ExistingDataHandling(mlrun.common.types.StrEnum):
+    fail_on_overlap = "fail_on_overlap"
+    skip_overlap = "skip_overlap"
+    delete_all = "delete_all"
 def _serialize_context_and_result(
     *,
     context: mm_context.MonitoringApplicationContext,
@@ -183,16 +193,47 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         cls,
         *,
         write_output: bool,
+        application_name: str,
+        artifact_path: str,
         stream_profile: Optional[ds_profile.DatastoreProfile],
         project: "mlrun.MlrunProject",
-    ) -> Iterator[dict[str, list[tuple]]]:
-        endpoints_output: dict[str, list[tuple]] = defaultdict(list)
+    ) -> Iterator[
+        tuple[
+            dict[str, list[tuple]],
+            Optional[mm_schedules.ModelMonitoringSchedulesFileApplication],
+        ]
+    ]:
+        endpoints_output: dict[
+            str,
+            list[
+                tuple[
+                    mm_context.MonitoringApplicationContext,
+                    Union[
+                        mm_results.ModelMonitoringApplicationResult,
+                        mm_results.ModelMonitoringApplicationMetric,
+                        list[
+                            Union[
+                                mm_results.ModelMonitoringApplicationResult,
+                                mm_results.ModelMonitoringApplicationMetric,
+                                mm_results._ModelMonitoringApplicationStats,
+                            ]
+                        ],
+                    ],
+                ]
+            ],
+        ] = defaultdict(list)
+        application_schedules = nullcontext()
         if write_output:
             cls._check_writer_is_up(project)
+            application_schedules = (
+                mm_schedules.ModelMonitoringSchedulesFileApplication(
+                    artifact_path, application=application_name
+                )
+            )
         try:
-            yield endpoints_output
+            yield endpoints_output, application_schedules.__enter__()
         finally:
-            if write_output:
+            if write_output and any(endpoints_output.values()):
                 logger.debug(
                     "Pushing model monitoring application job data to the writer stream",
                     passed_stream_profile=str(stream_profile),
@@ -206,11 +247,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                     profile=stream_profile,
                 )
                 for endpoint_id, outputs in endpoints_output.items():
+                    writer_events = []
+                    for ctx, res in outputs:
+                        if isinstance(res, list):
+                            writer_events.extend(
+                                _serialize_context_and_result(
+                                    context=ctx, result=sub_res
+                                )
+                                for sub_res in res
+                            )
+                        else:
+                            writer_events.append(
+                                _serialize_context_and_result(context=ctx, result=res)
+                            )
                     writer_stream.push(
-                        [
-                            _serialize_context_and_result(context=ctx, result=res)
-                            for ctx, res in outputs
-                        ],
+                        writer_events,
                         partition_key=endpoint_id,
                     )
                 logger.debug(
@@ -218,6 +269,20 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                     endpoints_output=endpoints_output,
                 )
+                logger.debug(
+                    "Saving the application schedules",
+                    application_name=application_name,
+                )
+                application_schedules.__exit__(None, None, None)
+    @classmethod
+    def _get_application_name(cls, context: "mlrun.MLClientCtx") -> str:
+        """Get the application name from the context via the function URI"""
+        _, application_name, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
+            context.to_dict().get("spec", {}).get("function", "")
+        )
+        return application_name
     def _handler(
         self,
         context: "mlrun.MLClientCtx",
@@ -230,6 +295,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         end: Optional[str] = None,
         base_period: Optional[int] = None,
         write_output: bool = False,
+        existing_data_handling: ExistingDataHandling = ExistingDataHandling.fail_on_overlap,
         stream_profile: Optional[ds_profile.DatastoreProfile] = None,
     ):
         """
@@ -250,6 +316,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                 "working with endpoints, without any custom data-frame input"
             )
+        application_name = self._get_application_name(context)
         feature_stats = (
             mm_api.get_sample_set_statistics(reference_data)
             if reference_data is not None
@@ -257,24 +325,18 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         )
         with self._push_to_writer(
-            write_output=write_output, stream_profile=stream_profile, project=project
-        ) as endpoints_output:
+            write_output=write_output,
+            stream_profile=stream_profile,
+            application_name=application_name,
+            artifact_path=context.artifact_path,
+            project=project,
+        ) as (endpoints_output, application_schedules):
-            def call_do_tracking(event: Optional[dict] = None):
+            def call_do_tracking(
+                monitoring_context: mm_context.MonitoringApplicationContext,
+            ):
                 nonlocal endpoints_output
-                if event is None:
-                    event = {}
-                monitoring_context = (
-                    mm_context.MonitoringApplicationContext._from_ml_ctx(
-                        event=event,
-                        application_name=self.__class__.__name__,
-                        context=context,
-                        project=project,
-                        sample_df=sample_data,
-                        feature_stats=feature_stats,
-                    )
-                )
                 result = self.do_tracking(monitoring_context)
                 endpoints_output[monitoring_context.endpoint_id].append(
                     (monitoring_context, result)
@@ -282,119 +344,383 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                 return result
             if endpoints is not None:
-                resolved_endpoints = self._handle_endpoints_type_evaluate(
+                resolved_endpoints = self._normalize_and_validate_endpoints(
                     project=project, endpoints=endpoints
                 )
-                for window_start, window_end in self._window_generator(
-                    start, end, base_period
+                if (
+                    write_output
+                    and existing_data_handling == ExistingDataHandling.delete_all
                 ):
-                    for endpoint_name, endpoint_id in resolved_endpoints:
-                        result = call_do_tracking(
-                            event={
-                                mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
-                                mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
-                                mm_constants.ApplicationEvent.START_INFER_TIME: window_start,
-                                mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
-                            }
-                        )
+                    endpoint_ids = [
+                        endpoint_id for _, endpoint_id in resolved_endpoints
+                    ]
+                    context.logger.info(
+                        "Deleting all the application data before running the application",
+                        application_name=application_name,
+                        endpoint_ids=endpoint_ids,
+                    )
+                    self._delete_application_data(
+                        project_name=project.name,
+                        application_name=application_name,
+                        endpoint_ids=endpoint_ids,
+                        application_schedules=application_schedules,
+                    )
+                for endpoint_name, endpoint_id in resolved_endpoints:
+                    for monitoring_ctx in self._window_generator(
+                        start=start,
+                        end=end,
+                        base_period=base_period,
+                        application_schedules=application_schedules,
+                        endpoint_id=endpoint_id,
+                        endpoint_name=endpoint_name,
+                        application_name=application_name,
+                        existing_data_handling=existing_data_handling,
+                        sample_data=sample_data,
+                        context=context,
+                        project=project,
+                    ):
+                        result = call_do_tracking(monitoring_ctx)
                         result_key = (
-                            f"{endpoint_name}-{endpoint_id}_{window_start.isoformat()}_{window_end.isoformat()}"
-                            if window_start and window_end
+                            f"{endpoint_name}-{endpoint_id}_{monitoring_ctx.start_infer_time.isoformat()}_{monitoring_ctx.end_infer_time.isoformat()}"
+                            if monitoring_ctx.start_infer_time
+                            and monitoring_ctx.end_infer_time
                             else f"{endpoint_name}-{endpoint_id}"
                         )
                         context.log_result(
                             result_key, self._flatten_data_result(result)
                         )
+                # Check if no result was produced for any endpoint (e.g., due to no data in all windows)
+                if not any(endpoints_output.values()):
+                    context.logger.warning(
+                        "No data was found for any of the specified endpoints. "
+                        "No results were produced",
+                        application_name=application_name,
+                        endpoints=endpoints,
+                        start=start,
+                        end=end,
+                    )
             else:
-                return self._flatten_data_result(call_do_tracking())
+                result = call_do_tracking(
+                    mm_context.MonitoringApplicationContext._from_ml_ctx(
+                        context=context,
+                        project=project,
+                        application_name=application_name,
+                        event={},
+                        sample_df=sample_data,
+                        feature_stats=feature_stats,
+                    )
+                )
+                return self._flatten_data_result(result)
     @staticmethod
-    def _handle_endpoints_type_evaluate(
+    def _check_endpoints_first_request(
+        endpoints: list[mlrun.common.schemas.ModelEndpoint],
+    ) -> None:
+        """Make sure that all the endpoints have had at least one request"""
+        endpoints_no_requests = [
+            (endpoint.metadata.name, endpoint.metadata.uid)
+            for endpoint in endpoints
+            if not endpoint.status.first_request
+        ]
+        if endpoints_no_requests:
+            raise mlrun.errors.MLRunValueError(
+                "The following model endpoints have not had any requests yet and "
+                "have no data, cannot run the model monitoring application on them: "
+                f"{endpoints_no_requests}"
+            )
+    @classmethod
+    def _normalize_and_validate_endpoints(
+        cls,
         project: "mlrun.MlrunProject",
         endpoints: Union[
             list[tuple[str, str]], list[list[str]], list[str], Literal["all"]
         ],
-    ) -> Union[list[tuple[str, str]], list[list[str]]]:
-        if not endpoints:
-            raise mlrun.errors.MLRunValueError(
-                "The endpoints list cannot be empty. If you want to run on all the endpoints, "
-                'use `endpoints="all"`.'
-            )
-        if isinstance(endpoints, list) and isinstance(endpoints[0], (tuple, list)):
-            return endpoints
-        if not (isinstance(endpoints, list) and isinstance(endpoints[0], str)):
-            if isinstance(endpoints, str):
-                if endpoints != "all":
-                    raise mlrun.errors.MLRunValueError(
-                        'A string input for `endpoints` can only be "all" for all the model endpoints in '
-                        "the project. If you want to select a single model endpoint with the given name, "
-                        f'use a list: `endpoints=["{endpoints}"]`.'
+    ) -> list[tuple[str, str]]:
+        if isinstance(endpoints, list):
+            if all(
+                isinstance(endpoint, (tuple, list)) and len(endpoint) == 2
+                for endpoint in endpoints
+            ):
+                # A list of [(name, uid), ...] / [[name, uid], ...] tuples/lists
+                endpoint_uids_to_names = {
+                    endpoint[1]: endpoint[0] for endpoint in endpoints
+                }
+                endpoints_list = project.list_model_endpoints(
+                    uids=list(endpoint_uids_to_names.keys()), latest_only=True
+                ).endpoints
+                # Check for missing endpoint uids or name/uid mismatches
+                for endpoint in endpoints_list:
+                    if (
+                        endpoint_uids_to_names[cast(str, endpoint.metadata.uid)]
+                        != endpoint.metadata.name
+                    ):
+                        raise mlrun.errors.MLRunNotFoundError(
+                            "Could not find model endpoint with name "
+                            f"'{endpoint_uids_to_names[cast(str, endpoint.metadata.uid)]}' "
+                            f"and uid '{endpoint.metadata.uid}'"
+                        )
+                missing = set(endpoint_uids_to_names.keys()) - {
+                    cast(str, endpoint.metadata.uid) for endpoint in endpoints_list
+                }
+                if missing:
+                    raise mlrun.errors.MLRunNotFoundError(
+                        "Could not find model endpoints with the following uids: "
+                        f"{missing}"
                     )
-            else:
-                raise mlrun.errors.MLRunValueError(
-                    f"Could not resolve endpoints as list of [(name, uid)], {endpoints=}"
-                )
-        if endpoints == "all":
-            endpoint_names = None
-        else:
-            endpoint_names = endpoints
-        endpoints_list = project.list_model_endpoints(
-            names=endpoint_names, latest_only=True
-        ).endpoints
-        if endpoints_list:
-            list_endpoints_result = [
-                (endpoint.metadata.name, endpoint.metadata.uid)
-                for endpoint in endpoints_list
-            ]
-            if endpoints != "all":
+            elif all(isinstance(endpoint, str) for endpoint in endpoints):
+                # A list of [name, ...] strings
+                endpoint_names = cast(list[str], endpoints)
+                endpoints_list = project.list_model_endpoints(
+                    names=endpoint_names, latest_only=True
+                ).endpoints
+                # Check for missing endpoint names
                 missing = set(endpoints) - {
-                    endpoint[0] for endpoint in list_endpoints_result
+                    endpoint.metadata.name for endpoint in endpoints_list
                 }
                 if missing:
                     logger.warning(
                         "Could not list all the required endpoints",
-                        missing_endpoint=missing,
-                        endpoints=list_endpoints_result,
+                        missing_endpoints=missing,
+                        endpoints_list=endpoints_list,
                     )
-            return list_endpoints_result
+            else:
+                raise mlrun.errors.MLRunValueError(
+                    "Could not resolve the following list as a list of endpoints:\n"
+                    f"{endpoints}\n"
+                    "The list must be either a list of (name, uid) tuples/lists or a list of names."
+                )
+        elif endpoints == "all":
+            endpoints_list = project.list_model_endpoints(latest_only=True).endpoints
+        elif isinstance(endpoints, str):
+            raise mlrun.errors.MLRunValueError(
+                'A string input for `endpoints` can only be "all" for all the model endpoints in '
+                "the project. If you want to select a single model endpoint with the given name, "
+                f'use a list: `endpoints=["{endpoints}"]`.'
+            )
         else:
-            if endpoints != "all":
-                err_msg_suffix = f" named '{endpoints}'"
+            raise mlrun.errors.MLRunValueError(
+                "Could not resolve the `endpoints` parameter. The parameter must be either:\n"
+                "- a list of (name, uid) tuples/lists\n"
+                "- a list of names\n"
+                '- the string "all" for all the model endpoints in the project.'
+            )
+        if not endpoints_list:
             raise mlrun.errors.MLRunNotFoundError(
-                f"Did not find any model endpoints {err_msg_suffix}"
+                f"Did not find any model endpoints {endpoints=}"
             )
+        cls._check_endpoints_first_request(endpoints_list)
+        return [
+            (endpoint.metadata.name, cast(str, endpoint.metadata.uid))
+            for endpoint in endpoints_list
+        ]
     @staticmethod
+    def _validate_and_get_window_length(
+        *, base_period: int, start_dt: datetime, end_dt: datetime
+    ) -> timedelta:
+        if not isinstance(base_period, int) or base_period <= 0:
+            raise mlrun.errors.MLRunValueError(
+                "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
+            )
+        window_length = timedelta(minutes=base_period)
+        full_interval_length = end_dt - start_dt
+        remainder = full_interval_length % window_length
+        if remainder:
+            if full_interval_length < window_length:
+                extra_msg = (
+                    "The `base_period` is longer than the difference between `end` and `start`: "
+                    f"{full_interval_length}. Consider not specifying `base_period`."
+                )
+            else:
+                extra_msg = (
+                    f"Consider changing the `end` time to `end`={end_dt - remainder}"
+                )
+            raise mlrun.errors.MLRunValueError(
+                "The difference between `end` and `start` must be a multiple of `base_period`: "
+                f"`base_period`={window_length}, `start`={start_dt}, `end`={end_dt}. "
+                f"{extra_msg}"
+            )
+        return window_length
+    @staticmethod
+    def _validate_monotonically_increasing_data(
+        *,
+        application_schedules: Optional[
+            mm_schedules.ModelMonitoringSchedulesFileApplication
+        ],
+        endpoint_id: str,
+        start_dt: datetime,
+        end_dt: datetime,
+        base_period: Optional[int],
+        application_name: str,
+        existing_data_handling: ExistingDataHandling,
+    ) -> datetime:
+        """Make sure that the (app, endpoint) pair doesn't write output before the last analyzed window"""
+        if application_schedules:
+            last_analyzed = application_schedules.get_endpoint_last_analyzed(
+                endpoint_id
+            )
+            if last_analyzed:
+                if start_dt < last_analyzed:
+                    if existing_data_handling == ExistingDataHandling.skip_overlap:
+                        if last_analyzed < end_dt and base_period is None:
+                            logger.warn(
+                                "Setting the start time to last_analyzed since the original start time precedes "
+                                "last_analyzed",
+                                original_start=start_dt,
+                                new_start=last_analyzed,
+                                application_name=application_name,
+                                endpoint_id=endpoint_id,
+                            )
+                            start_dt = last_analyzed
+                        else:
+                            raise mlrun.errors.MLRunValueError(
+                                "The start time for the application and endpoint precedes the last analyzed time: "
+                                f"start_dt='{start_dt}', last_analyzed='{last_analyzed}', {application_name=}, "
+                                f"{endpoint_id=}. "
+                                "Writing data out of order is not supported, and the start time could not be "
+                                "dynamically reset, as last_analyzed is later than the given end time or that "
+                                f"base_period was specified (end_dt='{end_dt}', {base_period=})."
+                            )
+                    else:
+                        raise mlrun.errors.MLRunValueError(
+                            "The start time for the application and endpoint precedes the last analyzed time: "
+                            f"start_dt='{start_dt}', last_analyzed='{last_analyzed}', {application_name=}, "
+                            f"{endpoint_id=}. "
+                            "Writing data out of order is not supported. You should change the start time to "
+                            f"'{last_analyzed}' or later."
+                        )
+            else:
+                logger.debug(
+                    "The application is running on the endpoint for the first time",
+                    endpoint_id=endpoint_id,
+                    start_dt=start_dt,
+                    application_name=application_name,
+                )
+        return start_dt
+    @staticmethod
+    def _delete_application_data(
+        project_name: str,
+        application_name: str,
+        endpoint_ids: list[str],
+        application_schedules: Optional[
+            mm_schedules.ModelMonitoringSchedulesFileApplication
+        ],
+    ) -> None:
+        mlrun.get_run_db().delete_model_monitoring_metrics(
+            project=project_name,
+            application_name=application_name,
+            endpoint_ids=endpoint_ids,
+        )
+        if application_schedules:
+            application_schedules.delete_endpoints_last_analyzed(
+                endpoint_uids=endpoint_ids
+            )
+    @classmethod
     def _window_generator(
-        start: Optional[str], end: Optional[str], base_period: Optional[int]
-    ) -> Iterator[tuple[Optional[datetime], Optional[datetime]]]:
+        cls,
+        *,
+        start: Optional[str],
+        end: Optional[str],
+        base_period: Optional[int],
+        application_schedules: Optional[
+            mm_schedules.ModelMonitoringSchedulesFileApplication
+        ],
+        endpoint_name: str,
+        endpoint_id: str,
+        application_name: str,
+        existing_data_handling: ExistingDataHandling,
+        context: "mlrun.MLClientCtx",
+        project: "mlrun.MlrunProject",
+        sample_data: Optional[pd.DataFrame],
+    ) -> Iterator[mm_context.MonitoringApplicationContext]:
+        def yield_monitoring_ctx(
+            window_start: Optional[datetime], window_end: Optional[datetime]
+        ) -> Iterator[mm_context.MonitoringApplicationContext]:
+            ctx = mm_context.MonitoringApplicationContext._from_ml_ctx(
+                event={
+                    mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
+                    mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
+                    mm_constants.ApplicationEvent.START_INFER_TIME: window_start,
+                    mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
+                },
+                application_name=application_name,
+                context=context,
+                project=project,
+                sample_df=sample_data,
+            )
+            if ctx.sample_df.empty:
+                # The current sample is empty
+                context.logger.debug(
+                    "No sample data available for tracking",
+                    application_name=application_name,
+                    endpoint_id=ctx.endpoint_id,
+                    start_time=ctx.start_infer_time,
+                    end_time=ctx.end_infer_time,
+                )
+                return
+            yield ctx
+            if application_schedules and window_end:
+                application_schedules.update_endpoint_last_analyzed(
+                    endpoint_uid=endpoint_id, last_analyzed=window_end
+                )
         if start is None or end is None:
             # A single window based on the `sample_data` input - see `_handler`.
-            yield None, None
+            yield from yield_monitoring_ctx(None, None)
             return
         start_dt = datetime.fromisoformat(start)
         end_dt = datetime.fromisoformat(end)
+        # If `start_dt` and `end_dt` do not include time zone information - change them to UTC
+        if (start_dt.tzinfo is None) and (end_dt.tzinfo is None):
+            start_dt = start_dt.replace(tzinfo=timezone.utc)
+            end_dt = end_dt.replace(tzinfo=timezone.utc)
+        elif (start_dt.tzinfo is None) or (end_dt.tzinfo is None):
+            raise mlrun.errors.MLRunValueError(
+                "The start and end times must either both include time zone information or both be naive (no time "
+                f"zone). Asserting the above failed, aborting the evaluate request: start={start}, end={end}."
+            )
+        if existing_data_handling != ExistingDataHandling.delete_all:
+            start_dt = cls._validate_monotonically_increasing_data(
+                application_schedules=application_schedules,
+                endpoint_id=endpoint_id,
+                start_dt=start_dt,
+                end_dt=end_dt,
+                base_period=base_period,
+                application_name=application_name,
+                existing_data_handling=existing_data_handling,
+            )
         if base_period is None:
-            yield start_dt, end_dt
+            yield from yield_monitoring_ctx(start_dt, end_dt)
             return
-        if not isinstance(base_period, int) or base_period <= 0:
-            raise mlrun.errors.MLRunValueError(
-                "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
-            )
+        window_length = cls._validate_and_get_window_length(
+            base_period=base_period, start_dt=start_dt, end_dt=end_dt
+        )
-        window_length = timedelta(minutes=base_period)
         current_start_time = start_dt
         while current_start_time < end_dt:
             current_end_time = min(current_start_time + window_length, end_dt)
-            yield current_start_time, current_end_time
+            yield from yield_monitoring_ctx(current_start_time, current_end_time)
             current_start_time = current_end_time
     @classmethod
@@ -445,6 +771,45 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         """
         return f"{handler_to_class}::{cls._handler.__name__}"
+    @classmethod
+    def _determine_job_name(
+        cls,
+        *,
+        func_name: Optional[str],
+        class_handler: Optional[str],
+        handler_to_class: str,
+    ) -> str:
+        """
+        Determine the batch app's job name. This name is used also as the application name,
+        which is retrieved in `_get_application_name`.
+        """
+        if func_name:
+            job_name = func_name
+        else:
+            if not class_handler:
+                class_name = cls.__name__
+            else:
+                class_name = handler_to_class.split(".")[-1].split("::")[0]
+            job_name = mlrun.utils.normalize_name(class_name)
+        if not mm_constants.APP_NAME_REGEX.fullmatch(job_name):
+            raise mlrun.errors.MLRunValueError(
+                "The function name does not comply with the required pattern "
+                f"`{mm_constants.APP_NAME_REGEX.pattern}`. "
+                "Please choose another `func_name`."
+            )
+        job_name, was_renamed, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(
+            job_name
+        )
+        if was_renamed:
+            mlrun.utils.logger.info(
+                f'Changing function name - adding `"{suffix}"` suffix',
+                func_name=job_name,
+            )
+        return job_name
     @classmethod
     def to_job(
         cls,
@@ -484,6 +849,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         * ``end``, ``datetime``
         * ``base_period``, ``int``
         * ``write_output``, ``bool``
+        * ``existing_data_handling``, ``str``
         For Git sources, add the source archive to the returned job and change the handler:
@@ -502,7 +868,10 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                                   :py:class:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase`,
                                   is used.
         :param func_path:         The path to the function. If ``None``, the current notebook is used.
-        :param func_name:         The name of the function. If not ``None``, the class name is used.
+        :param func_name:         The name of the function. If ``None``, the normalized class name is used
+                                  (:py:meth:`mlrun.utils.helpers.normalize_name`).
+                                  A ``"-batch"`` suffix is guaranteed to be added if not already there.
+                                  The function name is also used as the application name to use for the results.
         :param tag:               Tag for the function.
         :param image:             Docker image to run the job on (when running remotely).
         :param with_repo:         Whether to clone the current repo to the build source.
@@ -523,12 +892,11 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         handler_to_class = class_handler or cls.__name__
         handler = cls.get_job_handler(handler_to_class)
-        if not class_handler:
-            class_name = cls.__name__
-        else:
-            class_name = handler_to_class.split(".")[-1].split("::")[-1]
-        job_name = func_name if func_name else class_name
+        job_name = cls._determine_job_name(
+            func_name=func_name,
+            class_handler=class_handler,
+            handler_to_class=handler_to_class,
+        )
         job = cast(
             mlrun.runtimes.KubejobRuntime,
@@ -567,6 +935,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         end: Optional[datetime] = None,
         base_period: Optional[int] = None,
         write_output: bool = False,
+        existing_data_handling: ExistingDataHandling = ExistingDataHandling.fail_on_overlap,
         stream_profile: Optional[ds_profile.DatastoreProfile] = None,
     ) -> "mlrun.RunObject":
         """
@@ -574,11 +943,14 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
         model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
-        This function has default values for all of its arguments. You should be change them when you want to pass
+        This function has default values for all of its arguments. You should change them when you want to pass
         data to the application.
         :param func_path:         The path to the function. If ``None``, the current notebook is used.
-        :param func_name:         The name of the function. If not ``None``, the class name is used.
+        :param func_name:         The name of the function. If ``None``, the normalized class name is used
+                                  (:py:meth:`mlrun.utils.helpers.normalize_name`).
+                                  A ``"-batch"`` suffix is guaranteed to be added if not already there.
+                                  The function name is also used as the application name to use for the results.
         :param tag:               Tag for the function.
         :param run_local:         Whether to run the function locally or remotely.
         :param auto_build:        Whether to auto build the function.
@@ -588,6 +960,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         :param reference_data:    Pandas data-frame or :py:class:`~mlrun.artifacts.dataset.DatasetArtifact` URI as
                                   the reference dataset.
                                   When set, its statistics override the model endpoint's feature statistics.
+                                  You do not need to have a model endpoint to use this option.
         :param image:             Docker image to run the job on (when running remotely).
         :param with_repo:         Whether to clone the current repo to the build source.
         :param class_handler:     The relative path to the class, useful when using Git sources or code from images.
@@ -608,6 +981,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         :param start:             The start time of the endpoint's data, not included.
                                   If you want the model endpoint's data at ``start`` included, you need to subtract a
                                   small ``datetime.timedelta`` from it.
+                                  Make sure to include the time zone when constructing ``datetime.datetime`` objects
+                                  manually. When both ``start`` and ``end`` times do not include a time zone, they will
+                                  be treated as UTC.
         :param end:               The end time of the endpoint's data, included.
                                   Please note: when ``start`` and ``end`` are set, they create a left-open time interval
                                   ("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
@@ -616,17 +992,31 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                                   taken in the window's data.
         :param base_period:       The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
                                   is taken. If an integer is specified, the application is run from ``start`` to ``end``
-                                  in ``base_period`` length windows, except for the last window that ends at ``end`` and
-                                  therefore may be shorter:
+                                  in ``base_period`` length windows:
                                   :math:`(\\operatorname{start}, \\operatorname{start} + \\operatorname{base\\_period}],
                                   (\\operatorname{start} + \\operatorname{base\\_period},
                                   \\operatorname{start} + 2\\cdot\\operatorname{base\\_period}],
                                   ..., (\\operatorname{start} +
-                                  m\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
-                                  where :math:`m` is some positive integer.
+                                  (m - 1)\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
+                                  where :math:`m` is a positive integer and :math:`\\operatorname{end} =
+                                  \\operatorname{start} + m\\cdot\\operatorname{base\\_period}`.
+                                  Please note that the difference between ``end`` and ``start`` must be a multiple of
+                                  ``base_period``.
         :param write_output:      Whether to write the results and metrics to the time-series DB. Can be ``True`` only
                                   if ``endpoints`` are passed.
                                   Note: the model monitoring infrastructure must be up for the writing to work.
+        :param existing_data_handling:
+                                  How to handle the existing application data for the model endpoints when writing
+                                  new data whose requested ``start`` time precedes the ``end`` time of a previous run
+                                  that also wrote to the database. Relevant only when ``write_output=True``.
+                                  The options are:
+                                  - ``"fail_on_overlap"``: Default. An error is raised.
+                                  - ``"skip_overlap"``:  the overlapping data is ignored and the
+                                    time window is cut so that it starts at the earliest possible time after ``start``.
+                                  - ``"delete_all"``: delete all the data that was written by the application to the
+                                    model endpoints, regardless of the time window, and write the new data.
         :param stream_profile:    The stream datastore profile. It should be provided only when running locally and
                                   writing the outputs to the database (i.e., when both ``run_local`` and
                                   ``write_output`` are set to ``True``).
@@ -665,17 +1055,6 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                 )
                 params["end"] = end.isoformat() if isinstance(end, datetime) else end
                 params["base_period"] = base_period
-                params["write_output"] = write_output
-                if stream_profile:
-                    if not run_local:
-                        raise mlrun.errors.MLRunValueError(
-                            "Passing a `stream_profile` is relevant only when running locally"
-                        )
-                    if not write_output:
-                        raise mlrun.errors.MLRunValueError(
-                            "Passing a `stream_profile` is relevant only when writing the outputs"
-                        )
-                params["stream_profile"] = stream_profile
         elif start or end or base_period:
             raise mlrun.errors.MLRunValueError(
                 "Custom `start` and `end` times or base_period are supported only with endpoints data"
@@ -685,6 +1064,19 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                 "Writing the application output or passing `stream_profile` are supported only with endpoints data"
             )
+        params["write_output"] = write_output
+        params["existing_data_handling"] = existing_data_handling
+        if stream_profile:
+            if not run_local:
+                raise mlrun.errors.MLRunValueError(
+                    "Passing a `stream_profile` is relevant only when running locally"
+                )
+            if not write_output:
+                raise mlrun.errors.MLRunValueError(
+                    "Passing a `stream_profile` is relevant only when writing the outputs"
+                )
+        params["stream_profile"] = stream_profile
         inputs: dict[str, str] = {}
         for data, identifier in [
             (sample_data, "sample_data"),

mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc13py3-none-any.whl → 1.10.0rc42py3-none-any.whl