PyPI - mlrun - Versions diffs - 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl - Mend

mlrun 1.7.1rc4py3-none-any.whl → 1.8.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show

mlrun/__init__.py +23 -21
mlrun/__main__.py +3 -3
mlrun/alerts/alert.py +148 -14
mlrun/artifacts/__init__.py +1 -2
mlrun/artifacts/base.py +46 -12
mlrun/artifacts/dataset.py +16 -16
mlrun/artifacts/document.py +334 -0
mlrun/artifacts/manager.py +15 -13
mlrun/artifacts/model.py +66 -53
mlrun/common/constants.py +7 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +1 -29
mlrun/common/runtimes/constants.py +1 -2
mlrun/common/schemas/__init__.py +6 -2
mlrun/common/schemas/alert.py +111 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +11 -7
mlrun/common/schemas/auth.py +6 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +2 -3
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +66 -14
mlrun/common/schemas/model_monitoring/grafana.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +137 -0
mlrun/common/schemas/pipeline.py +2 -2
mlrun/common/schemas/project.py +25 -17
mlrun/common/schemas/runs.py +2 -2
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +5 -5
mlrun/config.py +67 -10
mlrun/data_types/__init__.py +0 -2
mlrun/data_types/infer.py +3 -1
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +0 -2
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +12 -4
mlrun/datastore/datastore.py +9 -3
mlrun/datastore/datastore_profile.py +79 -20
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +4 -1
mlrun/datastore/sources.py +52 -51
mlrun/datastore/store_resources.py +0 -2
mlrun/datastore/targets.py +21 -21
mlrun/datastore/utils.py +2 -2
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +194 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +208 -82
mlrun/db/factory.py +0 -3
mlrun/db/httpdb.py +1237 -386
mlrun/db/nopdb.py +201 -74
mlrun/errors.py +2 -2
mlrun/execution.py +136 -50
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +41 -40
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +27 -24
mlrun/feature_store/retrieval/base.py +14 -9
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/steps.py +2 -2
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +29 -27
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/launcher/base.py +3 -4
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +4 -3
mlrun/model.py +117 -46
mlrun/model_monitoring/__init__.py +4 -4
mlrun/model_monitoring/api.py +61 -59
mlrun/model_monitoring/applications/_application_steps.py +17 -17
mlrun/model_monitoring/applications/base.py +165 -6
mlrun/model_monitoring/applications/context.py +88 -37
mlrun/model_monitoring/applications/evidently_base.py +1 -2
mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
mlrun/model_monitoring/applications/results.py +55 -3
mlrun/model_monitoring/controller.py +207 -239
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +156 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/base.py +78 -25
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +90 -16
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +279 -59
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
mlrun/model_monitoring/helpers.py +152 -49
mlrun/model_monitoring/stream_processing.py +99 -283
mlrun/model_monitoring/tracking_policy.py +10 -3
mlrun/model_monitoring/writer.py +48 -36
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +1 -1
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +31 -14
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +27 -27
mlrun/projects/pipelines.py +75 -38
mlrun/projects/project.py +865 -206
mlrun/run.py +53 -10
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +15 -11
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/generators.py +2 -1
mlrun/runtimes/kubejob.py +4 -5
mlrun/runtimes/mounts.py +572 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -11
mlrun/runtimes/nuclio/function.py +19 -17
mlrun/runtimes/nuclio/serving.py +18 -11
mlrun/runtimes/pod.py +154 -45
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +21 -11
mlrun/runtimes/utils.py +6 -5
mlrun/serving/merger.py +6 -4
mlrun/serving/remote.py +18 -17
mlrun/serving/routers.py +185 -172
mlrun/serving/server.py +7 -1
mlrun/serving/states.py +97 -78
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +74 -65
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +1 -1
mlrun/utils/clones.py +1 -1
mlrun/utils/helpers.py +66 -18
mlrun/utils/logger.py +106 -4
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +33 -14
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +6 -6
mlrun/utils/notifications/notification/webhook.py +6 -6
mlrun/utils/notifications/notification_pusher.py +86 -44
mlrun/utils/regex.py +3 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +191 -186
mlrun-1.8.0rc8.dist-info/RECORD +347 -0
{mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.1rc4.dist-info/RECORD +0 -351
{mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
{mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -11,31 +11,31 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import concurrent.futures
 import datetime
 import json
 import os
-import re
 from collections.abc import Iterator
-from typing import NamedTuple, Optional, Union, cast
+from contextlib import AbstractContextManager
+from types import TracebackType
+from typing import NamedTuple, Optional, cast
-import nuclio
+import nuclio_sdk
 import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
-import mlrun.data_types.infer
 import mlrun.feature_store as fstore
-import mlrun.model_monitoring.db.stores
-from mlrun.config import config as mlconf
+import mlrun.model_monitoring
+from mlrun.common.schemas import EndpointType
 from mlrun.datastore import get_stream_pusher
 from mlrun.errors import err_to_str
-from mlrun.model_monitoring.helpers import (
-    _BatchDict,
-    batch_dict2timedelta,
-    get_stream_path,
-)
+from mlrun.model_monitoring.db._schedules import ModelMonitoringSchedulesFile
+from mlrun.model_monitoring.helpers import batch_dict2timedelta, get_stream_path
 from mlrun.utils import datetime_now, logger
+_SECONDS_IN_DAY = int(datetime.timedelta(days=1).total_seconds())
 class _Interval(NamedTuple):
     start: datetime.datetime
@@ -45,12 +45,12 @@ class _Interval(NamedTuple):
 class _BatchWindow:
     def __init__(
         self,
-        project: str,
-        endpoint: str,
+        *,
+        schedules_file: ModelMonitoringSchedulesFile,
         application: str,
         timedelta_seconds: int,
-        last_updated: Optional[int],
-        first_request: Optional[int],
+        last_updated: int,
+        first_request: int,
     ) -> None:
         """
         Initialize a batch window object that handles the batch interval time range
@@ -58,159 +58,124 @@ class _BatchWindow:
         All the time values are in seconds.
         The start and stop time are in seconds since the epoch.
         """
-        self.project = project
-        self._endpoint = endpoint
         self._application = application
         self._first_request = first_request
         self._stop = last_updated
         self._step = timedelta_seconds
-        self._db = mlrun.model_monitoring.get_store_object(project=self.project)
+        self._db = schedules_file
         self._start = self._get_last_analyzed()
-    def _get_last_analyzed(self) -> Optional[int]:
-        try:
-            last_analyzed = self._db.get_last_analyzed(
-                endpoint_id=self._endpoint,
-                application_name=self._application,
-            )
-        except mlrun.errors.MLRunNotFoundError:
-            logger.info(
-                "No last analyzed time was found for this endpoint and "
-                "application, as this is probably the first time this "
-                "application is running. Using the latest between first "
-                "request time or last update time minus one day instead",
-                endpoint=self._endpoint,
-                application=self._application,
-                first_request=self._first_request,
-                last_updated=self._stop,
-            )
-            if self._first_request and self._stop:
-                # TODO : Change the timedelta according to the policy.
-                first_period_in_seconds = max(
-                    int(datetime.timedelta(days=1).total_seconds()), self._step
-                )  # max between one day and the base period
-                return max(
-                    self._first_request,
-                    self._stop - first_period_in_seconds,
-                )
-            return self._first_request
+    def _get_saved_last_analyzed(self) -> Optional[int]:
+        return cast(int, self._db.get_application_time(self._application))
-        logger.info(
-            "Got the last analyzed time for this endpoint and application",
-            endpoint=self._endpoint,
-            application=self._application,
-            last_analyzed=last_analyzed,
+    def _update_last_analyzed(self, last_analyzed: int) -> None:
+        self._db.update_application_time(
+            application=self._application, timestamp=last_analyzed
         )
-        return last_analyzed
-    def _update_last_analyzed(self, last_analyzed: int) -> None:
+    def _get_initial_last_analyzed(self) -> int:
         logger.info(
-            "Updating the last analyzed time for this endpoint and application",
-            endpoint=self._endpoint,
+            "No last analyzed time was found for this endpoint and application, as this is "
+            "probably the first time this application is running. Initializing last analyzed "
+            "to the latest between first request time or last update time minus one day",
             application=self._application,
-            last_analyzed=last_analyzed,
+            first_request=self._first_request,
+            last_updated=self._stop,
         )
-        self._db.update_last_analyzed(
-            endpoint_id=self._endpoint,
-            application_name=self._application,
-            last_analyzed=last_analyzed,
+        # max between one day and the base period
+        first_period_in_seconds = max(_SECONDS_IN_DAY, self._step)
+        return max(
+            self._first_request,
+            self._stop - first_period_in_seconds,
         )
-    def get_intervals(
-        self,
-    ) -> Iterator[_Interval]:
-        """Generate the batch interval time ranges."""
-        if self._start is not None and self._stop is not None:
-            entered = False
-            # Iterate timestamp from start until timestamp <= stop - step
-            # so that the last interval will end at (timestamp + step) <= stop.
-            # Add 1 to stop - step to get <= and not <.
-            for timestamp in range(
-                self._start, self._stop - self._step + 1, self._step
-            ):
-                entered = True
-                start_time = datetime.datetime.fromtimestamp(
-                    timestamp, tz=datetime.timezone.utc
-                )
-                end_time = datetime.datetime.fromtimestamp(
-                    timestamp + self._step, tz=datetime.timezone.utc
-                )
-                yield _Interval(start_time, end_time)
-                self._update_last_analyzed(timestamp + self._step)
-            if not entered:
-                logger.info(
-                    "All the data is set, but no complete intervals were found. "
-                    "Wait for last_updated to be updated",
-                    endpoint=self._endpoint,
-                    application=self._application,
-                    start=self._start,
-                    stop=self._stop,
-                    step=self._step,
-                )
+    def _get_last_analyzed(self) -> int:
+        saved_last_analyzed = self._get_saved_last_analyzed()
+        if saved_last_analyzed is not None:
+            return saved_last_analyzed
         else:
-            logger.warn(
-                "The first request time is not found for this endpoint. "
-                "No intervals will be generated",
-                endpoint=self._endpoint,
+            last_analyzed = self._get_initial_last_analyzed()
+            # Update the in-memory DB to avoid duplicate initializations
+            self._update_last_analyzed(last_analyzed)
+        return last_analyzed
+    def get_intervals(self) -> Iterator[_Interval]:
+        """Generate the batch interval time ranges."""
+        entered = False
+        # Iterate timestamp from start until timestamp <= stop - step
+        # so that the last interval will end at (timestamp + step) <= stop.
+        # Add 1 to stop - step to get <= and not <.
+        for timestamp in range(self._start, self._stop - self._step + 1, self._step):
+            entered = True
+            start_time = datetime.datetime.fromtimestamp(
+                timestamp, tz=datetime.timezone.utc
+            )
+            end_time = datetime.datetime.fromtimestamp(
+                timestamp + self._step, tz=datetime.timezone.utc
+            )
+            yield _Interval(start_time, end_time)
+            last_analyzed = timestamp + self._step
+            self._update_last_analyzed(last_analyzed)
+            logger.debug(
+                "Updated the last analyzed time for this endpoint and application",
+                application=self._application,
+                last_analyzed=last_analyzed,
+            )
+        if not entered:
+            logger.debug(
+                "All the data is set, but no complete intervals were found. "
+                "Wait for last_updated to be updated",
                 application=self._application,
                 start=self._start,
                 stop=self._stop,
+                step=self._step,
             )
-class _BatchWindowGenerator:
-    def __init__(self, batch_dict: Union[dict, str]) -> None:
+class _BatchWindowGenerator(AbstractContextManager):
+    def __init__(self, project: str, endpoint_id: str, window_length: int) -> None:
         """
         Initialize a batch window generator object that generates batch window objects
         for the monitoring functions.
         """
-        self._batch_dict = batch_dict
-        self._norm_batch_dict()
-        self._timedelta = self._get_timedelta()
-    def _norm_batch_dict(self) -> None:
-        # TODO: This will be removed once the job params can be parsed with different types
-        # Convert batch dict string into a dictionary
-        if isinstance(self._batch_dict, str):
-            self._parse_batch_dict_str()
-    def _parse_batch_dict_str(self) -> None:
-        """Convert batch dictionary string into a valid dictionary"""
-        characters_to_remove = "{} "
-        pattern = "[" + characters_to_remove + "]"
-        # Remove unnecessary characters from the provided string
-        batch_list = re.sub(pattern, "", self._batch_dict).split(",")
-        # Initialize the dictionary of batch interval ranges
-        self._batch_dict = {}
-        for pair in batch_list:
-            pair_list = pair.split(":")
-            self._batch_dict[pair_list[0]] = float(pair_list[1])
-    def _get_timedelta(self) -> int:
-        """Get the timedelta in seconds from the batch dictionary"""
-        return int(
-            batch_dict2timedelta(cast(_BatchDict, self._batch_dict)).total_seconds()
+        self._project = project
+        self._endpoint_id = endpoint_id
+        self._timedelta = window_length
+        self._schedules_file = ModelMonitoringSchedulesFile(
+            project=project, endpoint_id=endpoint_id
+        )
+    def __enter__(self) -> "_BatchWindowGenerator":
+        self._schedules_file.__enter__()
+        return super().__enter__()
+    def __exit__(
+        self,
+        exc_type: Optional[type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
+    ) -> Optional[bool]:
+        self._schedules_file.__exit__(
+            exc_type=exc_type, exc_value=exc_value, traceback=traceback
         )
     @classmethod
     def _get_last_updated_time(
-        cls, last_request: Optional[str], has_stream: bool
-    ) -> Optional[int]:
+        cls, last_request: datetime.datetime, not_batch_endpoint: bool
+    ) -> int:
         """
         Get the last updated time of a model endpoint.
         """
-        if not last_request:
-            return None
         last_updated = int(
-            cls._date_string2timestamp(last_request)
+            last_request.timestamp()
             - cast(
                 float,
                 mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
         )
-        if not has_stream:
+        if not not_batch_endpoint:
             # If the endpoint does not have a stream, `last_updated` should be
             # the minimum between the current time and the last updated time.
             # This compensates for the bumping mechanism - see
@@ -221,45 +186,38 @@ class _BatchWindowGenerator:
             )
         return last_updated
-    @classmethod
-    def _normalize_first_request(
-        cls, first_request: Optional[str], endpoint: str
-    ) -> Optional[int]:
-        if not first_request:
-            logger.debug(
-                "There is no first request time for this endpoint.",
-                endpoint=endpoint,
-                first_request=first_request,
-            )
-            return None
-        return cls._date_string2timestamp(first_request)
-    @staticmethod
-    def _date_string2timestamp(date_string: str) -> int:
-        return int(datetime.datetime.fromisoformat(date_string).timestamp())
-    def get_batch_window(
+    def get_intervals(
         self,
-        project: str,
-        endpoint: str,
+        *,
         application: str,
-        first_request: Optional[str],
-        last_request: Optional[str],
-        has_stream: bool,
-    ) -> _BatchWindow:
+        first_request: datetime.datetime,
+        last_request: datetime.datetime,
+        not_batch_endpoint: bool,
+    ) -> Iterator[_Interval]:
         """
         Get the batch window for a specific endpoint and application.
-        first_request is the first request time to the endpoint.
+        `first_request` and `last_request` are the timestamps of the first request and last
+        request to the endpoint, respectively. They are guaranteed to be nonempty at this point.
         """
-        return _BatchWindow(
-            project=project,
-            endpoint=endpoint,
+        batch_window = _BatchWindow(
+            schedules_file=self._schedules_file,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request, has_stream),
-            first_request=self._normalize_first_request(first_request, endpoint),
+            last_updated=self._get_last_updated_time(last_request, not_batch_endpoint),
+            first_request=int(first_request.timestamp()),
         )
+        yield from batch_window.get_intervals()
+def _get_window_length() -> int:
+    """Get the timedelta in seconds from the batch dictionary"""
+    return int(
+        batch_dict2timedelta(
+            json.loads(
+                cast(str, os.getenv(mm_constants.EventFieldType.BATCH_INTERVALS_DICT))
+            )
+        ).total_seconds()
+    )
 class MonitoringApplicationController:
@@ -276,19 +234,11 @@ class MonitoringApplicationController:
         logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
-        self.db = mlrun.model_monitoring.get_store_object(project=self.project)
-        self._batch_window_generator = _BatchWindowGenerator(
-            batch_dict=json.loads(
-                mlrun.get_secret_or_env(
-                    mm_constants.EventFieldType.BATCH_INTERVALS_DICT
-                )
-            )
-        )
+        self._window_length = _get_window_length()
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
         self.storage_options = None
-        if mlconf.artifact_path.startswith("s3://"):
+        if mlrun.mlconf.artifact_path.startswith("s3://"):
             self.storage_options = mlrun.mlconf.get_s3_storage_options()
     @staticmethod
@@ -299,6 +249,19 @@ class MonitoringApplicationController:
             access_key = mlrun.mlconf.get_v3io_access_key()
         return access_key
+    @staticmethod
+    def _should_monitor_endpoint(endpoint: mlrun.common.schemas.ModelEndpoint) -> bool:
+        return (
+            # Is the model endpoint monitored?
+            endpoint.status.monitoring_mode == mm_constants.ModelMonitoringMode.enabled
+            # Was the model endpoint called? I.e., are the first and last requests nonempty?
+            and endpoint.status.first_request
+            and endpoint.status.last_request
+            # Is the model endpoint not a router endpoint? Router endpoint has no feature stats
+            and endpoint.metadata.endpoint_type.value
+            != mm_constants.EndpointType.ROUTER.value
+        )
     def run(self) -> None:
         """
         Main method for run all the relevant monitoring applications on each endpoint.
@@ -312,7 +275,10 @@ class MonitoringApplicationController:
         logger.info("Start running monitoring controller")
         try:
             applications_names = []
-            endpoints = self.db.list_model_endpoints(include_stats=True)
+            endpoints_list = mlrun.db.get_run_db().list_model_endpoints(
+                project=self.project, tsdb_metrics=True
+            )
+            endpoints = endpoints_list.endpoints
             if not endpoints:
                 logger.info("No model endpoints found", project=self.project)
                 return
@@ -349,43 +315,36 @@ class MonitoringApplicationController:
                 exc=err_to_str(e),
             )
             return
-        # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
+        # Initialize a thread pool that will be used to monitor each endpoint on a dedicated thread
         with concurrent.futures.ThreadPoolExecutor(
-            max_workers=min(len(endpoints), 10),
+            max_workers=min(len(endpoints), 10)
         ) as pool:
             for endpoint in endpoints:
-                if (
-                    endpoint[mm_constants.EventFieldType.ACTIVE]
-                    and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
-                    == mm_constants.ModelMonitoringMode.enabled.value
-                ):
-                    # Skip router endpoint:
-                    if (
-                        int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
-                        == mm_constants.EndpointType.ROUTER
-                    ):
-                        # Router endpoint has no feature stats
-                        logger.info(
-                            f"{endpoint[mm_constants.EventFieldType.UID]} is router, skipping"
-                        )
-                        continue
+                if self._should_monitor_endpoint(endpoint):
                     pool.submit(
                         MonitoringApplicationController.model_endpoint_process,
+                        project=self.project,
                         endpoint=endpoint,
                         applications_names=applications_names,
-                        batch_window_generator=self._batch_window_generator,
-                        project=self.project,
+                        window_length=self._window_length,
                         model_monitoring_access_key=self.model_monitoring_access_key,
                         storage_options=self.storage_options,
                     )
+                else:
+                    logger.debug(
+                        "Skipping endpoint, not ready or not suitable for monitoring",
+                        endpoint_id=endpoint.metadata.uid,
+                        endpoint_name=endpoint.metadata.name,
+                    )
+        logger.info("Finished running monitoring controller")
     @classmethod
     def model_endpoint_process(
         cls,
-        endpoint: dict,
-        applications_names: list[str],
-        batch_window_generator: _BatchWindowGenerator,
         project: str,
+        endpoint: mlrun.common.schemas.ModelEndpoint,
+        applications_names: list[str],
+        window_length: int,
         model_monitoring_access_key: str,
         storage_options: Optional[dict] = None,
     ) -> None:
@@ -401,56 +360,60 @@ class MonitoringApplicationController:
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
         :param storage_options:             (dict) Storage options for reading the infer parquet files.
         """
-        endpoint_id = endpoint[mm_constants.EventFieldType.UID]
-        has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
-        m_fs = fstore.get_feature_set(
-            endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
+        endpoint_id = endpoint.metadata.uid
+        not_batch_endpoint = not (
+            endpoint.metadata.endpoint_type == EndpointType.BATCH_EP
         )
+        m_fs = fstore.get_feature_set(endpoint.spec.monitoring_feature_set_uri)
         try:
-            for application in applications_names:
-                batch_window = batch_window_generator.get_batch_window(
-                    project=project,
-                    endpoint=endpoint_id,
-                    application=application,
-                    first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
-                    last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
-                    has_stream=has_stream,
-                )
-                for start_infer_time, end_infer_time in batch_window.get_intervals():
-                    df = m_fs.to_dataframe(
-                        start_time=start_infer_time,
-                        end_time=end_infer_time,
-                        time_column=mm_constants.EventFieldType.TIMESTAMP,
-                        storage_options=storage_options,
-                    )
-                    if len(df) == 0:
-                        logger.info(
-                            "No data found for the given interval",
-                            start=start_infer_time,
-                            end=end_infer_time,
-                            endpoint_id=endpoint_id,
-                        )
-                    else:
-                        logger.info(
-                            "Data found for the given interval",
-                            start=start_infer_time,
-                            end=end_infer_time,
-                            endpoint_id=endpoint_id,
-                        )
-                        cls._push_to_applications(
-                            start_infer_time=start_infer_time,
-                            end_infer_time=end_infer_time,
-                            endpoint_id=endpoint_id,
-                            project=project,
-                            applications_names=[application],
-                            model_monitoring_access_key=model_monitoring_access_key,
+            with _BatchWindowGenerator(
+                project=project, endpoint_id=endpoint_id, window_length=window_length
+            ) as batch_window_generator:
+                for application in applications_names:
+                    for (
+                        start_infer_time,
+                        end_infer_time,
+                    ) in batch_window_generator.get_intervals(
+                        application=application,
+                        first_request=endpoint.status.first_request,
+                        last_request=endpoint.status.last_request,
+                        not_batch_endpoint=not_batch_endpoint,
+                    ):
+                        df = m_fs.to_dataframe(
+                            start_time=start_infer_time,
+                            end_time=end_infer_time,
+                            time_column=mm_constants.EventFieldType.TIMESTAMP,
+                            storage_options=storage_options,
                         )
+                        if len(df) == 0:
+                            logger.info(
+                                "No data found for the given interval",
+                                start=start_infer_time,
+                                end=end_infer_time,
+                                endpoint_id=endpoint_id,
+                            )
+                        else:
+                            logger.info(
+                                "Data found for the given interval",
+                                start=start_infer_time,
+                                end=end_infer_time,
+                                endpoint_id=endpoint_id,
+                            )
+                            cls._push_to_applications(
+                                start_infer_time=start_infer_time,
+                                end_infer_time=end_infer_time,
+                                endpoint_id=endpoint_id,
+                                endpoint_name=endpoint.metadata.name,
+                                project=project,
+                                applications_names=[application],
+                                model_monitoring_access_key=model_monitoring_access_key,
+                            )
+                logger.info("Finished processing endpoint", endpoint_id=endpoint_id)
         except Exception:
             logger.exception(
                 "Encountered an exception",
-                endpoint_id=endpoint[mm_constants.EventFieldType.UID],
+                endpoint_id=endpoint.metadata.uid,
             )
     @staticmethod
@@ -458,6 +421,7 @@ class MonitoringApplicationController:
         start_infer_time: datetime.datetime,
         end_infer_time: datetime.datetime,
         endpoint_id: str,
+        endpoint_name: str,
         project: str,
         applications_names: list[str],
         model_monitoring_access_key: str,
@@ -481,6 +445,7 @@ class MonitoringApplicationController:
                 sep=" ", timespec="microseconds"
             ),
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
+            mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
                 function_name=mm_constants.MonitoringFunctionNames.WRITER,
@@ -491,14 +456,17 @@ class MonitoringApplicationController:
             stream_uri = get_stream_path(project=project, function_name=app_name)
             logger.info(
-                f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
+                "Pushing data to application stream",
+                endpoint_id=endpoint_id,
+                app_name=app_name,
+                stream_uri=stream_uri,
             )
             get_stream_pusher(stream_uri, access_key=model_monitoring_access_key).push(
                 [data]
             )
-def handler(context: nuclio.Context, event: nuclio.Event) -> None:
+def handler(context: nuclio_sdk.Context, event: nuclio_sdk.Event) -> None:
     """
     Run model monitoring application processor

mlrun/model_monitoring/db/__init__.py CHANGED Viewed

@@ -12,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .stores import ObjectStoreFactory, get_store_object
-from .stores.base import StoreBase
 from .tsdb import get_tsdb_connector
 from .tsdb.base import TSDBConnector

mlrun 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.1rc4py3-none-any.whl → 1.8.0rc8py3-none-any.whl