PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -24,15 +24,12 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.errors
 import mlrun.feature_store as fstore
 import mlrun.feature_store.feature_set as fs
-import mlrun.features
 import mlrun.serving
 import mlrun.utils
 from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
 from mlrun.common.model_monitoring.helpers import FeatureStats
 from mlrun.common.schemas import ModelEndpoint
-from mlrun.model_monitoring.helpers import (
-    calculate_inputs_statistics,
-)
+from mlrun.model_monitoring.helpers import calculate_inputs_statistics
 class _ArtifactsLogger(Protocol):
@@ -252,6 +249,7 @@ class MonitoringApplicationContext:
                 project=self.project_name,
                 endpoint_id=self.endpoint_id,
                 feature_analysis=True,
+                tsdb_metrics=False,
             )
         return self._model_endpoint

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -11,33 +11,37 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import collections
 import concurrent.futures
 import datetime
 import json
 import os
 import traceback
+import warnings
 from collections.abc import Iterator
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Any, NamedTuple, Optional, Union, cast
+from typing import Any, Final, NamedTuple, Optional, Union, cast
 import nuclio_sdk
+import numpy as np
 import pandas as pd
 import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.feature_store as fstore
 import mlrun.model_monitoring
 import mlrun.model_monitoring.db._schedules as schedules
 import mlrun.model_monitoring.helpers
 import mlrun.platforms.iguazio
+from mlrun.common.schemas import EndpointType
 from mlrun.common.schemas.model_monitoring.constants import (
     ControllerEvent,
     ControllerEventEndpointPolicy,
 )
 from mlrun.errors import err_to_str
 from mlrun.model_monitoring.helpers import batch_dict2timedelta
-from mlrun.utils import logger
+from mlrun.utils import datetime_now, logger
 _SECONDS_IN_DAY = int(datetime.timedelta(days=1).total_seconds())
 _SECONDS_IN_MINUTE = 60
@@ -49,14 +53,16 @@ class _Interval(NamedTuple):
 class _BatchWindow:
+    TIMESTAMP_RESOLUTION_MICRO: Final = 1e-6  # 0.000001 seconds or 1 microsecond
     def __init__(
         self,
         *,
         schedules_file: schedules.ModelMonitoringSchedulesFileEndpoint,
         application: str,
         timedelta_seconds: int,
-        last_updated: int,
-        first_request: int,
+        last_updated: float,
+        first_request: float,
         endpoint_mode: mm_constants.EndpointMode = mm_constants.EndpointMode.REAL_TIME,
     ) -> None:
         """
@@ -73,15 +79,17 @@ class _BatchWindow:
         self._endpoint_mode = endpoint_mode
         self._start = self._get_last_analyzed()
-    def _get_saved_last_analyzed(self) -> Optional[int]:
-        return cast(int, self._db.get_application_time(self._application))
+    def _get_saved_last_analyzed(
+        self,
+    ) -> Optional[float]:
+        return self._db.get_application_time(self._application)
-    def _update_last_analyzed(self, last_analyzed: int) -> None:
+    def _update_last_analyzed(self, last_analyzed: float) -> None:
         self._db.update_application_time(
             application=self._application, timestamp=last_analyzed
         )
-    def _get_initial_last_analyzed(self) -> int:
+    def _get_initial_last_analyzed(self) -> float:
         if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
             logger.info(
                 "No last analyzed time was found for this endpoint and application, as this is "
@@ -107,7 +115,7 @@ class _BatchWindow:
             self._stop - first_period_in_seconds,
         )
-    def _get_last_analyzed(self) -> int:
+    def _get_last_analyzed(self) -> float:
         saved_last_analyzed = self._get_saved_last_analyzed()
         if saved_last_analyzed is not None:
             if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
@@ -127,13 +135,14 @@ class _BatchWindow:
         # Iterate timestamp from start until timestamp <= stop - step
         # so that the last interval will end at (timestamp + step) <= stop.
         # Add 1 to stop - step to get <= and not <.
-        for timestamp in range(self._start, self._stop - self._step + 1, self._step):
+        for timestamp in np.arange(
+            self._start, self._stop - self._step + 1, self._step
+        ):
             entered = True
-            start_time = datetime.datetime.fromtimestamp(
-                timestamp, tz=datetime.timezone.utc
-            )
+            start_time = datetime.datetime.fromtimestamp(timestamp, tz=datetime.UTC)
             end_time = datetime.datetime.fromtimestamp(
-                timestamp + self._step, tz=datetime.timezone.utc
+                timestamp - self.TIMESTAMP_RESOLUTION_MICRO + self._step,
+                tz=datetime.UTC,
             )
             yield _Interval(start_time, end_time)
@@ -149,27 +158,19 @@ class _BatchWindow:
             # If the endpoint is a batch endpoint, we need to update the last analyzed time
             # to the end of the batch time.
             if last_analyzed:
-                if last_analyzed < self._stop:
+                if last_analyzed - self.TIMESTAMP_RESOLUTION_MICRO < self._stop:
                     # If the last analyzed time is earlier than the stop time,
                     # yield the final partial interval from last_analyzed to stop
                     yield _Interval(
-                        datetime.datetime.fromtimestamp(
-                            last_analyzed, tz=datetime.timezone.utc
-                        ),
-                        datetime.datetime.fromtimestamp(
-                            self._stop, tz=datetime.timezone.utc
-                        ),
+                        datetime.datetime.fromtimestamp(last_analyzed, tz=datetime.UTC),
+                        datetime.datetime.fromtimestamp(self._stop, tz=datetime.UTC),
                     )
             else:
                 # The time span between the start and end of the batch is shorter than the step,
                 # so we need to yield a partial interval covering that range.
                 yield _Interval(
-                    datetime.datetime.fromtimestamp(
-                        self._start, tz=datetime.timezone.utc
-                    ),
-                    datetime.datetime.fromtimestamp(
-                        self._stop, tz=datetime.timezone.utc
-                    ),
+                    datetime.datetime.fromtimestamp(self._start, tz=datetime.UTC),
+                    datetime.datetime.fromtimestamp(self._stop, tz=datetime.UTC),
                 )
             self._update_last_analyzed(last_analyzed=self._stop)
@@ -223,7 +224,7 @@ class _BatchWindowGenerator(AbstractContextManager):
     def get_application_list(self) -> set[str]:
         return self._schedules_file.get_application_list()
-    def get_min_last_analyzed(self) -> Optional[int]:
+    def get_min_last_analyzed(self) -> Optional[float]:
         return self._schedules_file.get_min_timestamp()
     @classmethod
@@ -231,22 +232,29 @@ class _BatchWindowGenerator(AbstractContextManager):
         cls,
         last_request: datetime.datetime,
         endpoint_mode: mm_constants.EndpointMode,
-    ) -> int:
+        not_old_batch_endpoint: bool,
+    ) -> float:
         """
         Get the last updated time of a model endpoint.
         """
         if endpoint_mode == mm_constants.EndpointMode.REAL_TIME:
-            last_updated = int(
-                last_request.timestamp()
-                - cast(
-                    float,
-                    mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
-                )
+            last_updated = last_request.timestamp() - cast(
+                float,
+                mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
+            if not not_old_batch_endpoint:
+                # If the endpoint does not have a stream, `last_updated` should be
+                # the minimum between the current time and the last updated time.
+                # This compensates for the bumping mechanism - see
+                # `update_model_endpoint_last_request`.
+                last_updated = min(datetime_now().timestamp(), last_updated)
+                logger.debug(
+                    "The endpoint does not have a stream", last_updated=last_updated
+                )
             return last_updated
-        return int(last_request.timestamp())
+        return last_request.timestamp()
     def get_intervals(
         self,
@@ -255,6 +263,7 @@ class _BatchWindowGenerator(AbstractContextManager):
         first_request: datetime.datetime,
         last_request: datetime.datetime,
         endpoint_mode: mm_constants.EndpointMode,
+        not_old_batch_endpoint: bool,
     ) -> Iterator[_Interval]:
         """
         Get the batch window for a specific endpoint and application.
@@ -266,8 +275,10 @@ class _BatchWindowGenerator(AbstractContextManager):
             schedules_file=self._schedules_file,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request, endpoint_mode),
-            first_request=int(first_request.timestamp()),
+            last_updated=self._get_last_updated_time(
+                last_request, endpoint_mode, not_old_batch_endpoint
+            ),
+            first_request=first_request.timestamp(),
             endpoint_mode=endpoint_mode,
         )
         yield from self.batch_window.get_intervals()
@@ -291,6 +302,8 @@ class MonitoringApplicationController:
     Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
     """
+    _MAX_FEATURE_SET_PER_WORKER = 1000
     def __init__(self) -> None:
         """Initialize Monitoring Application Controller"""
         self.project = cast(str, mlrun.mlconf.active_project)
@@ -324,6 +337,9 @@ class MonitoringApplicationController:
                 mlrun.platforms.iguazio.KafkaOutputStream,
             ],
         ] = {}
+        self.feature_sets: collections.OrderedDict[
+            str, mlrun.feature_store.FeatureSet
+        ] = collections.OrderedDict()
         self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
             project=self.project
         )
@@ -433,15 +449,14 @@ class MonitoringApplicationController:
                         base_period_minutes, current_min_last_analyzed, current_time
                     )
                     and (
-                        int(endpoint.status.last_request.timestamp())
-                        != last_timestamp_sent
+                        endpoint.status.last_request.timestamp() != last_timestamp_sent
                         or current_min_last_analyzed != last_analyzed_sent
                     )
                 ):
                     # Write to schedule chief file the last_request, min_last_analyzed we pushed event to stream
                     schedules_file.update_endpoint_timestamps(
                         endpoint_uid=endpoint.metadata.uid,
-                        last_request=int(endpoint.status.last_request.timestamp()),
+                        last_request=endpoint.status.last_request.timestamp(),
                         last_analyzed=current_min_last_analyzed,
                     )
                     return True
@@ -460,13 +475,14 @@ class MonitoringApplicationController:
                 last_request=endpoint.status.last_request,
                 first_request=endpoint.status.first_request,
                 endpoint_type=endpoint.metadata.endpoint_type,
+                feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
             )
         return False
     @staticmethod
     def _should_send_nop_event(
         base_period_minutes: int,
-        min_last_analyzed: int,
+        min_last_analyzed: float,
         current_time: datetime.datetime,
     ):
         if min_last_analyzed:
@@ -515,7 +531,7 @@ class MonitoringApplicationController:
         try:
             project_name = event[ControllerEvent.PROJECT]
             endpoint_id = event[ControllerEvent.ENDPOINT_ID]
+            not_old_batch_endpoint = True
             if (
                 event[ControllerEvent.KIND]
                 == mm_constants.ControllerEventKind.BATCH_COMPLETE
@@ -572,6 +588,10 @@ class MonitoringApplicationController:
                 endpoint_mode = mm_constants.EndpointMode.REAL_TIME
+                not_old_batch_endpoint = (
+                    event[ControllerEvent.ENDPOINT_TYPE] != EndpointType.BATCH_EP
+                )
             logger.info(
                 "Starting to analyze", timestamp=last_stream_timestamp.isoformat()
             )
@@ -590,16 +610,49 @@ class MonitoringApplicationController:
                         first_request=first_request,
                         last_request=last_stream_timestamp,
                         endpoint_mode=endpoint_mode,
+                        not_old_batch_endpoint=not_old_batch_endpoint,
                     ):
                         data_in_window = False
-                        # Serving endpoint - get the relevant window data from the TSDB
-                        prediction_metric = self.tsdb_connector.read_predictions(
-                            start=start_infer_time,
-                            end=end_infer_time,
-                            endpoint_id=endpoint_id,
-                        )
-                        if prediction_metric.data:
-                            data_in_window = True
+                        if not_old_batch_endpoint:
+                            # Serving endpoint - get the relevant window data from the TSDB
+                            prediction_metric = self.tsdb_connector.read_predictions(
+                                start=start_infer_time,
+                                end=end_infer_time,
+                                endpoint_id=endpoint_id,
+                            )
+                            if prediction_metric.data:
+                                data_in_window = True
+                        else:
+                            # Old batch endpoint - get the relevant window data from the parquet target
+                            warnings.warn(
+                                "Analyzing batch model endpoints with real time processing events is "
+                                "deprecated in 1.10.0 and will be removed in 1.12.0. "
+                                "Instead, use job-based serving to invoke and analyze offline batch model"
+                                "endpoints.",
+                                # TODO: Remove this in 1.12.0
+                                FutureWarning,
+                            )
+                            if endpoint_id not in self.feature_sets:
+                                self.feature_sets[endpoint_id] = fstore.get_feature_set(
+                                    event[ControllerEvent.FEATURE_SET_URI]
+                                )
+                            self.feature_sets.move_to_end(endpoint_id, last=False)
+                            if (
+                                len(self.feature_sets)
+                                > self._MAX_FEATURE_SET_PER_WORKER
+                            ):
+                                self.feature_sets.popitem(last=True)
+                            m_fs = self.feature_sets.get(endpoint_id)
+                            df = m_fs.to_dataframe(
+                                start_time=start_infer_time,
+                                end_time=end_infer_time,
+                                time_column=mm_constants.EventFieldType.TIMESTAMP,
+                                storage_options=self.storage_options,
+                            )
+                            if len(df) > 0:
+                                data_in_window = True
                         if not data_in_window:
                             logger.info(
@@ -616,7 +669,10 @@ class MonitoringApplicationController:
                                 endpoint_id=endpoint_id,
                             )
                             self._push_to_applications(
-                                start_infer_time=start_infer_time,
+                                start_infer_time=start_infer_time
+                                - datetime.timedelta(
+                                    batch_window_generator.batch_window.TIMESTAMP_RESOLUTION_MICRO
+                                ),  # We subtract a microsecond to ensure that the apps will retrieve start time data.
                                 end_infer_time=end_infer_time,
                                 endpoint_id=endpoint_id,
                                 endpoint_name=endpoint_name,
@@ -653,6 +709,9 @@ class MonitoringApplicationController:
                             ControllerEvent.ENDPOINT_TYPE: event[
                                 ControllerEvent.ENDPOINT_TYPE
                             ],
+                            ControllerEvent.FEATURE_SET_URI: event[
+                                ControllerEvent.FEATURE_SET_URI
+                            ],
                             ControllerEvent.FIRST_REQUEST: event[
                                 ControllerEvent.FIRST_REQUEST
                             ],
@@ -732,8 +791,17 @@ class MonitoringApplicationController:
         logger.info("Starting monitoring controller chief")
         applications_names = []
         endpoints = self.project_obj.list_model_endpoints(
-            tsdb_metrics=False, mode=mm_constants.EndpointMode.REAL_TIME
+            tsdb_metrics=False,
+            modes=[
+                mm_constants.EndpointMode.REAL_TIME,
+                mm_constants.EndpointMode.BATCH_LEGACY,
+            ],
         ).endpoints
+        if not endpoints:
+            logger.info("No model endpoints found", project=self.project)
+            return
         last_request_dict = self.tsdb_connector.get_last_request(
             endpoint_ids=[mep.metadata.uid for mep in endpoints]
         )
@@ -742,9 +810,6 @@ class MonitoringApplicationController:
                 mm_constants.EventFieldType.ENDPOINT_ID
             )[mm_constants.ModelEndpointSchema.LAST_REQUEST].to_dict()
-        if not endpoints:
-            logger.info("No model endpoints found", project=self.project)
-            return
         monitoring_functions = self.project_obj.list_model_monitoring_functions()
         if monitoring_functions:
             # if monitoring_functions: - TODO : ML-7700
@@ -790,7 +855,11 @@ class MonitoringApplicationController:
                 for endpoint in endpoints:
                     last_request = last_request_dict.get(endpoint.metadata.uid, None)
                     if isinstance(last_request, float):
-                        last_request = pd.to_datetime(last_request, unit="s", utc=True)
+                        last_request = datetime.datetime.fromtimestamp(
+                            last_request, tz=datetime.UTC
+                        )
+                    elif isinstance(last_request, pd.Timestamp):
+                        last_request = last_request.to_pydatetime()
                     endpoint.status.last_request = (
                         last_request or endpoint.status.last_request
                     )
@@ -842,6 +911,7 @@ class MonitoringApplicationController:
                     sep=" ", timespec="microseconds"
                 ),
                 endpoint_type=endpoint.metadata.endpoint_type,
+                feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
                 endpoint_policy=json.dumps(policy),
             )
             policy[ControllerEventEndpointPolicy.ENDPOINT_UPDATED] = (
@@ -859,6 +929,7 @@ class MonitoringApplicationController:
                     sep=" ", timespec="microseconds"
                 ),
                 endpoint_type=endpoint.metadata.endpoint_type.value,
+                feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
                 endpoint_policy=policy,
             )
@@ -871,6 +942,7 @@ class MonitoringApplicationController:
         timestamp: str,
         first_request: str,
         endpoint_type: int,
+        feature_set_uri: str,
         endpoint_policy: dict[str, Any],
     ) -> None:
         """
@@ -883,6 +955,7 @@ class MonitoringApplicationController:
         :param endpoint_id: endpoint id string
         :param endpoint_name: the endpoint name string
         :param endpoint_type: Enum of the endpoint type
+        :param feature_set_uri: the feature set uri string
         """
         event = {
             ControllerEvent.KIND.value: kind,
@@ -892,6 +965,7 @@ class MonitoringApplicationController:
             ControllerEvent.TIMESTAMP.value: timestamp,
             ControllerEvent.FIRST_REQUEST.value: first_request,
             ControllerEvent.ENDPOINT_TYPE.value: endpoint_type,
+            ControllerEvent.FEATURE_SET_URI.value: feature_set_uri,
             ControllerEvent.ENDPOINT_POLICY.value: endpoint_policy,
         }
         logger.info(

mlrun/model_monitoring/db/_schedules.py CHANGED Viewed

@@ -13,15 +13,12 @@
 # limitations under the License.
 import json
-import sys
 from abc import ABC, abstractmethod
 from contextlib import AbstractContextManager
-from datetime import datetime, timezone
+from datetime import datetime
 from types import TracebackType
 from typing import TYPE_CHECKING, Final, Optional
-import botocore.exceptions
 import mlrun
 import mlrun.common.schemas as schemas
 import mlrun.errors
@@ -30,10 +27,7 @@ import mlrun.utils.helpers
 from mlrun.utils import logger
 if TYPE_CHECKING:
-    if sys.version_info >= (3, 11):
-        from typing import Self
-    else:
-        from typing_extensions import Self
+    from typing import Self
 class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
@@ -88,16 +82,8 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
         except (
             mlrun.errors.MLRunNotFoundError,
             # Different errors are raised for S3 or local storage, see ML-8042
-            botocore.exceptions.ClientError,
             FileNotFoundError,
-        ) as err:
-            if (
-                isinstance(err, botocore.exceptions.ClientError)
-                # Add a log only to "NoSuchKey" errors codes - equivalent to `FileNotFoundError`
-                and err.response["Error"]["Code"] != "NoSuchKey"
-            ):
-                raise
+        ):
             logger.exception(
                 "The schedules file was not found. It should have been created "
                 "as a part of the model endpoint's creation",
@@ -162,19 +148,29 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
             endpoint_id=model_endpoint.metadata.uid,
         )
-    def get_application_time(self, application: str) -> Optional[int]:
+    def get_application_time(self, application: str) -> Optional[float]:
         self._check_open_schedules()
         return self._schedules.get(application)
-    def update_application_time(self, application: str, timestamp: int) -> None:
+    def update_application_time(self, application: str, timestamp: float) -> None:
+        self._check_open_schedules()
+        self._schedules[application] = float(timestamp)
+    def delete_application_time(self, application: str) -> None:
         self._check_open_schedules()
-        self._schedules[application] = timestamp
+        if application in self._schedules:
+            logger.debug(
+                "Deleting application time from schedules",
+                application=application,
+                endpoint_id=self._endpoint_id,
+            )
+            del self._schedules[application]
     def get_application_list(self) -> set[str]:
         self._check_open_schedules()
         return set(self._schedules.keys())
-    def get_min_timestamp(self) -> Optional[int]:
+    def get_min_timestamp(self) -> Optional[float]:
         self._check_open_schedules()
         return min(self._schedules.values(), default=None)
@@ -198,7 +194,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
             project=self._project
         )
-    def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[int]:
+    def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[float]:
         self._check_open_schedules()
         if endpoint_uid in self._schedules:
             return self._schedules[endpoint_uid].get(
@@ -208,15 +204,19 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
             return None
     def update_endpoint_timestamps(
-        self, endpoint_uid: str, last_request: int, last_analyzed: int
+        self, endpoint_uid: str, last_request: float, last_analyzed: float
     ) -> None:
         self._check_open_schedules()
         self._schedules[endpoint_uid] = {
-            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: last_request,
-            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: last_analyzed,
+            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: float(
+                last_request
+            ),
+            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: float(
+                last_analyzed
+            ),
         }
-    def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[int]:
+    def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[float]:
         self._check_open_schedules()
         if endpoint_uid in self._schedules:
             return self._schedules[endpoint_uid].get(
@@ -267,9 +267,18 @@ class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
         self, endpoint_uid: str, last_analyzed: datetime
     ) -> None:
         self._check_open_schedules()
-        self._schedules[endpoint_uid] = last_analyzed.astimezone(
-            timezone.utc
-        ).isoformat()
+        self._schedules[endpoint_uid] = last_analyzed.isoformat()
+    def delete_endpoints_last_analyzed(self, endpoint_uids: list[str]) -> None:
+        self._check_open_schedules()
+        for endpoint_uid in endpoint_uids:
+            if endpoint_uid in self._schedules:
+                logger.debug(
+                    "Deleting endpoint last analyzed from schedules",
+                    endpoint_uid=endpoint_uid,
+                    application=self._application,
+                )
+                del self._schedules[endpoint_uid]
 def _delete_folder(folder: str) -> None:

mlrun/model_monitoring/db/_stats.py CHANGED Viewed

@@ -13,11 +13,11 @@
 # limitations under the License.
 import abc
 import json
+import typing
 from abc import abstractmethod
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from typing import cast
-import botocore.exceptions
 import fsspec
 import mlrun.datastore.base
@@ -73,7 +73,7 @@ class ModelMonitoringStatsFile(abc.ABC):
                 path=self._item.url,
             )
-    def read(self) -> tuple[dict, datetime]:
+    def read(self) -> tuple[dict, typing.Optional[datetime]]:
         """
         Read the stats data and timestamp saved in file
         :return: tuple[dict, str] dictionary with stats data and timestamp saved in file
@@ -82,30 +82,20 @@ class ModelMonitoringStatsFile(abc.ABC):
             content = json.loads(self._item.get().decode())
             timestamp = content.get("timestamp")
             if timestamp is not None:
-                timestamp = datetime.fromisoformat(timestamp).astimezone(
-                    tz=timezone.utc
-                )
+                timestamp = datetime.fromisoformat(timestamp).astimezone(tz=UTC)
             return content.get("data"), timestamp
         except (
             mlrun.errors.MLRunNotFoundError,
             # Different errors are raised for S3 or local storage, see ML-8042
-            botocore.exceptions.ClientError,
             FileNotFoundError,
         ) as err:
-            if (
-                isinstance(err, botocore.exceptions.ClientError)
-                # Add a log only to "NoSuchKey" errors codes - equivalent to `FileNotFoundError`
-                and err.response["Error"]["Code"] != "NoSuchKey"
-            ):
-                raise
-            logger.exception(
+            logger.warning(
                 "The Stats file was not found. It should have been created "
                 "as a part of the model endpoint's creation",
                 path=self._path,
                 error=err,
             )
-            raise
+            return {}, None
     def write(self, stats: dict, timestamp: datetime) -> None:
         """

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl