PyPI - mlrun - Versions diffs - 1.6.0rc21__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl - Mend

mlrun 1.6.0rc21py3-none-any.whl → 1.6.0rc22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (45) hide show

mlrun/artifacts/base.py +6 -6
mlrun/artifacts/dataset.py +15 -8
mlrun/artifacts/manager.py +1 -1
mlrun/artifacts/model.py +2 -2
mlrun/artifacts/plots.py +8 -8
mlrun/datastore/azure_blob.py +9 -14
mlrun/datastore/base.py +21 -7
mlrun/datastore/dbfs_store.py +10 -10
mlrun/datastore/filestore.py +2 -1
mlrun/datastore/google_cloud_storage.py +9 -8
mlrun/datastore/redis.py +2 -1
mlrun/datastore/s3.py +3 -6
mlrun/datastore/sources.py +2 -12
mlrun/datastore/targets.py +2 -13
mlrun/datastore/v3io.py +16 -19
mlrun/db/httpdb.py +8 -1
mlrun/execution.py +14 -5
mlrun/feature_store/api.py +3 -4
mlrun/launcher/base.py +4 -4
mlrun/lists.py +0 -6
mlrun/model.py +8 -1
mlrun/model_monitoring/api.py +9 -31
mlrun/model_monitoring/batch.py +14 -13
mlrun/model_monitoring/controller.py +91 -69
mlrun/model_monitoring/controller_handler.py +1 -3
mlrun/model_monitoring/helpers.py +19 -8
mlrun/model_monitoring/stream_processing.py +0 -3
mlrun/projects/operations.py +1 -1
mlrun/projects/project.py +5 -4
mlrun/runtimes/base.py +6 -1
mlrun/runtimes/constants.py +11 -0
mlrun/runtimes/kubejob.py +1 -1
mlrun/runtimes/local.py +64 -53
mlrun/serving/routers.py +7 -20
mlrun/serving/server.py +4 -14
mlrun/serving/utils.py +0 -3
mlrun/utils/helpers.py +5 -2
mlrun/utils/logger.py +5 -5
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/METADATA +3 -1
{mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/RECORD +45 -45
{mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/WHEEL +0 -0
{mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/top_level.txt +0 -0

mlrun/db/httpdb.py CHANGED Viewed

@@ -707,7 +707,7 @@ class HTTPRunDB(RunDBInterface):
         :param state: List only runs whose state is specified.
         :param sort: Whether to sort the result according to their start time. Otherwise, results will be
             returned by their internal order in the DB (order will not be guaranteed).
-        :param last: Deprecated - currently not used.
+        :param last: Deprecated - currently not used (will be removed in 1.8.0).
         :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
         :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
         :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -733,6 +733,13 @@ class HTTPRunDB(RunDBInterface):
                 "using the `with_notifications` flag."
             )
+        if last:
+            # TODO: Remove this in 1.8.0
+            warnings.warn(
+                "'last' is deprecated and will be removed in 1.8.0.",
+                FutureWarning,
+            )
         if (
             not name
             and not uid

mlrun/execution.py CHANGED Viewed

@@ -393,7 +393,7 @@ class MLClientCtx(object):
                     if v:
                         self._set_input(k, v)
-        if host and not is_api and self.is_logging_worker():
+        if host and not is_api:
             self.set_label("host", host)
         start = get_in(attrs, "status.start_time")
@@ -411,7 +411,7 @@ class MLClientCtx(object):
                 self._artifacts_manager.artifacts[key] = artifact_obj
             self._state = status.get("state", self._state)
-        # Do not store run if not logging worker to avoid conflicts like host label
+        # No need to store the run for every worker
         if store_run and self.is_logging_worker():
             self.store_run()
         return self
@@ -434,6 +434,12 @@ class MLClientCtx(object):
             context.set_label("framework", "sklearn")
         """
+        if not self.is_logging_worker():
+            logger.warning(
+                "Setting labels is only supported in the logging worker, ignoring"
+            )
+            return
         if replace or not self._labels.get(key):
             self._labels[key] = str(value)
@@ -974,10 +980,11 @@ class MLClientCtx(object):
         """
         # If it's a OpenMPI job, get the global rank and compare to the logging rank (worker) set in MLRun's
         # configuration:
-        if self.labels.get("kind", "job") == "mpijob":
+        labels = self.labels
+        if "host" in labels and labels.get("kind", "job") == "mpijob":
             # The host (pod name) of each worker is created by k8s, and by default it uses the rank number as the id in
             # the following template: ...-worker-<rank>
-            rank = int(self.labels["host"].rsplit("-", 1)[1])
+            rank = int(labels["host"].rsplit("-", 1)[1])
             return rank == mlrun.mlconf.packagers.logging_worker
         # Single worker is always the logging worker:
@@ -1004,7 +1011,6 @@ class MLClientCtx(object):
                 _struct[key] = val
         struct = {
-            "metadata.labels": self._labels,
             "metadata.annotations": self._annotations,
             "spec.parameters": self._parameters,
             "spec.outputs": self._outputs,
@@ -1019,6 +1025,9 @@ class MLClientCtx(object):
         if self._state != "completed":
             struct["status.state"] = self._state
+        if self.is_logging_worker():
+            struct["metadata.labels"] = self._labels
         set_if_not_none(struct, "status.error", self._error)
         set_if_not_none(struct, "status.commit", self._commit)
         set_if_not_none(struct, "status.iterations", self._iteration_results)

mlrun/feature_store/api.py CHANGED Viewed

@@ -933,7 +933,7 @@ def _deploy_ingestion_service_v2(
         source = HTTPSource()
         func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
         config = RunConfig(function=func)
-        fstore.deploy_ingestion_service(my_set, source, run_config=config)
+        my_set.deploy_ingestion_service(source, run_config=config)
     :param featureset:    feature set object or uri
     :param source:        data source object describing the online or offline source
@@ -1025,7 +1025,7 @@ def deploy_ingestion_service(
         source = HTTPSource()
         func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
         config = RunConfig(function=func)
-        fstore.deploy_ingestion_service(my_set, source, run_config=config)
+        my_set.deploy_ingestion_service(source, run_config=config)
     :param featureset:    feature set object or uri
     :param source:        data source object describing the online or offline source
@@ -1036,8 +1036,7 @@ def deploy_ingestion_service(
     :return: URL to access the deployed ingestion service
     """
-    endpoint, _ = deploy_ingestion_service_v2(
-        featureset=featureset,
+    endpoint, _ = featureset.deploy_ingestion_service(
         source=source,
         targets=targets,
         name=name,

mlrun/launcher/base.py CHANGED Viewed

@@ -396,10 +396,10 @@ class BaseLauncher(abc.ABC):
                 status=run.status.state,
                 name=run.metadata.name,
             )
-            if run.status.state in [
-                mlrun.runtimes.constants.RunStates.error,
-                mlrun.runtimes.constants.RunStates.aborted,
-            ]:
+            if (
+                run.status.state
+                in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
+            ):
                 if runtime._is_remote and not runtime.is_child:
                     logger.error(
                         "Run did not finish successfully",

mlrun/lists.py CHANGED Viewed

@@ -227,9 +227,3 @@ class ArtifactList(list):
             if artifact:
                 dataitems.append(mlrun.get_dataitem(artifact))
         return dataitems
-class FunctionList(list):
-    def __init__(self):
-        pass
-        # TODO

mlrun/model.py CHANGED Viewed

@@ -1259,8 +1259,15 @@ class RunObject(RunTemplate):
         """error string if failed"""
         if self.status:
             unknown_error = ""
-            if self.status.state in mlrun.runtimes.constants.RunStates.error_states():
+            if (
+                self.status.state
+                in mlrun.runtimes.constants.RunStates.abortion_states()
+            ):
+                unknown_error = "Run was aborted"
+            elif self.status.state in mlrun.runtimes.constants.RunStates.error_states():
                 unknown_error = "Unknown error"
             return (
                 self.status.error
                 or self.status.reason

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -132,7 +132,6 @@ def record_results(
     drift_threshold: typing.Optional[float] = None,
     possible_drift_threshold: typing.Optional[float] = None,
     trigger_monitoring_job: bool = False,
-    last_in_batch_set: typing.Optional[bool] = True,
     artifacts_tag: str = "",
     default_batch_image="mlrun/mlrun",
 ) -> ModelEndpoint:
@@ -165,14 +164,6 @@ def record_results(
     :param possible_drift_threshold: The threshold of which to mark possible drifts.
     :param trigger_monitoring_job:   If true, run the batch drift job. If not exists, the monitoring batch function
                                      will be registered through MLRun API with the provided image.
-    :param last_in_batch_set:        This flag can (and should only) be used when the model endpoint does not have
-                                     model-monitoring set.
-                                     If set to `True` (the default), this flag marks the current monitoring window
-                                     (on this monitoring endpoint) is completed - the data inferred so far is assumed
-                                     to be the total data for this monitoring window.
-                                     You may want to set this flag to `False` if you want to record multiple results in
-                                     close time proximity ("batch set"). In this case, set this flag to `False` on all
-                                     but the last batch in the set.
     :param artifacts_tag:            Tag to use for all the artifacts resulted from the function. Will be relevant
                                      only if the monitoring batch job has been triggered.
@@ -206,25 +197,14 @@ def record_results(
         )
     if model_endpoint.spec.stream_path == "":
-        if last_in_batch_set:
-            logger.info(
-                "Updating the last request time to mark the current monitoring window as completed",
-                project=project,
-                endpoint_id=model_endpoint.metadata.uid,
-            )
-            bump_model_endpoint_last_request(
-                project=project, model_endpoint=model_endpoint, db=db
-            )
-    else:
-        if last_in_batch_set is not None:
-            logger.warning(
-                "`last_in_batch_set` is not `None`, but the model endpoint has a stream path. "
-                "Ignoring `last_in_batch_set`, as it is relevant only when the model "
-                "endpoint does not have a model monitoring infrastructure in place (i.e. stream path is "
-                " empty). Set `last_in_batch_set` to `None` to resolve this warning.",
-                project=project,
-                endpoint_id=model_endpoint.metadata.uid,
-            )
+        logger.info(
+            "Updating the last request time to mark the current monitoring window as completed",
+            project=project,
+            endpoint_id=model_endpoint.metadata.uid,
+        )
+        bump_model_endpoint_last_request(
+            project=project, model_endpoint=model_endpoint, db=db
+        )
     if trigger_monitoring_job:
         # Run the monitoring batch drift job
@@ -612,9 +592,7 @@ def read_dataset_as_dataframe(
         if label_columns is None:
             label_columns = dataset.status.label_column
         # Get the features and parse to DataFrame:
-        dataset = mlrun.feature_store.get_offline_features(
-            dataset.uri, drop_columns=drop_columns
-        ).to_dataframe()
+        dataset = dataset.get_offline_features(drop_columns=drop_columns).to_dataframe()
     elif isinstance(dataset, (list, np.ndarray)):
         if not feature_columns:

mlrun/model_monitoring/batch.py CHANGED Viewed

@@ -117,20 +117,21 @@ class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
     def _calc_kl_div(
         actual_dist: np.array, expected_dist: np.array, kld_scaling: float
     ) -> float:
-        """Return the assymetric KL divergence"""
+        """Return the asymmetric KL divergence"""
+        # We take 0*log(0) == 0 for this calculation
+        mask = actual_dist != 0
+        actual_dist = actual_dist[mask]
+        expected_dist = expected_dist[mask]
         return np.sum(
-            np.where(
-                actual_dist != 0,
-                (actual_dist)
-                * np.log(
-                    actual_dist
-                    / np.where(expected_dist != 0, expected_dist, kld_scaling)
-                ),
-                0,
-            )
+            actual_dist
+            * np.log(
+                actual_dist / np.where(expected_dist != 0, expected_dist, kld_scaling)
+            ),
         )
-    def compute(self, capping: float = None, kld_scaling: float = 1e-4) -> float:
+    def compute(
+        self, capping: Optional[float] = None, kld_scaling: float = 1e-4
+    ) -> float:
         """
         :param capping:      A bounded value for the KL Divergence. For infinite distance, the result is replaced with
                              the capping value which indicates a huge differences between the distributions.
@@ -141,8 +142,8 @@ class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
         t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, kld_scaling)
         u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, kld_scaling)
         result = t_u + u_t
-        if capping:
-            return capping if result == float("inf") else result
+        if capping and result == float("inf"):
+            return capping
         return result

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -17,7 +17,7 @@ import datetime
 import json
 import os
 import re
-from typing import Any, Iterator, Optional, Tuple, Union, cast
+from typing import Any, Iterator, NamedTuple, Optional, Union, cast
 from v3io.dataplane.response import HttpResponseError
@@ -35,10 +35,15 @@ from mlrun.model_monitoring.helpers import (
     get_monitoring_parquet_path,
     get_stream_path,
 )
-from mlrun.utils import logger
+from mlrun.utils import create_logger, datetime_now, logger
 from mlrun.utils.v3io_clients import get_v3io_client
+class _Interval(NamedTuple):
+    start: datetime.datetime
+    end: datetime.datetime
 class _BatchWindow:
     V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
@@ -60,7 +65,11 @@ class _BatchWindow:
         self._endpoint = endpoint
         self._application = application
         self._first_request = first_request
-        self._kv_storage = get_v3io_client(endpoint=mlrun.mlconf.v3io_api).kv
+        self._kv_storage = get_v3io_client(
+            endpoint=mlrun.mlconf.v3io_api,
+            # Avoid noisy warning logs before the KV table is created
+            logger=create_logger(name="v3io_client", level="error"),
+        ).kv
         self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
         self._stop = last_updated
         self._step = timedelta_seconds
@@ -75,24 +84,26 @@ class _BatchWindow:
             )
         except HttpResponseError as err:
             logger.info(
-                "Failed to get the last analyzed time for this endpoint and application, "
-                "as this is probably the first time this application is running. ",
-                "Using the latest between first request time or last update time minus one day instead.",
+                "No last analyzed time was found for this endpoint and "
+                "application, as this is probably the first time this "
+                "application is running. Using the latest between first "
+                "request time or last update time minus one day instead",
                 endpoint=self._endpoint,
                 application=self._application,
                 first_request=self._first_request,
-                last_update=self._stop,
-                error=err,
-            )
-            # TODO : Change the timedelta according to the policy.
-            first_period_in_seconds = max(
-                int(datetime.timedelta(days=1).total_seconds()), self._step
-            )  # max between one day and the base period
-            return max(
-                self._first_request,
-                self._stop - first_period_in_seconds,
+                last_updated=self._stop,
             )
+            logger.debug("Error while getting last analyzed time", err=err)
+            if self._first_request and self._stop:
+                # TODO : Change the timedelta according to the policy.
+                first_period_in_seconds = max(
+                    int(datetime.timedelta(days=1).total_seconds()), self._step
+                )  # max between one day and the base period
+                return max(
+                    self._first_request,
+                    self._stop - first_period_in_seconds,
+                )
+            return self._first_request
         last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
         logger.info(
@@ -119,20 +130,29 @@ class _BatchWindow:
     def get_intervals(
         self,
-    ) -> Iterator[Tuple[datetime.datetime, datetime.datetime]]:
+    ) -> Iterator[_Interval]:
         """Generate the batch interval time ranges."""
         if self._start is not None and self._stop is not None:
             entered = False
-            for timestamp in range(self._start, self._stop, self._step):
+            # Iterate timestamp from start until timestamp <= stop - step
+            # so that the last interval will end at (timestamp + step) <= stop.
+            # Add 1 to stop - step to get <= and not <.
+            for timestamp in range(
+                self._start, self._stop - self._step + 1, self._step
+            ):
                 entered = True
-                start_time = datetime.datetime.utcfromtimestamp(timestamp)
-                end_time = datetime.datetime.utcfromtimestamp(timestamp + self._step)
-                yield start_time, end_time
+                start_time = datetime.datetime.fromtimestamp(
+                    timestamp, tz=datetime.timezone.utc
+                )
+                end_time = datetime.datetime.fromtimestamp(
+                    timestamp + self._step, tz=datetime.timezone.utc
+                )
+                yield _Interval(start_time, end_time)
                 self._update_last_analyzed(timestamp + self._step)
             if not entered:
                 logger.info(
                     "All the data is set, but no complete intervals were found. "
-                    "Wait for last_updated to be updated.",
+                    "Wait for last_updated to be updated",
                     endpoint=self._endpoint,
                     application=self._application,
                     start=self._start,
@@ -141,8 +161,8 @@ class _BatchWindow:
                 )
         else:
             logger.warn(
-                "The first request time is not not found for this endpoint. "
-                "No intervals will be generated.",
+                "The first request time is not found for this endpoint. "
+                "No intervals will be generated",
                 endpoint=self._endpoint,
                 application=self._application,
                 start=self._start,
@@ -185,26 +205,38 @@ class _BatchWindowGenerator:
         )
     @classmethod
-    def _get_last_updated_time(cls, last_request: Optional[str]) -> Optional[int]:
+    def _get_last_updated_time(
+        cls, last_request: Optional[str], has_stream: bool
+    ) -> Optional[int]:
         """
         Get the last updated time of a model endpoint.
         """
         if not last_request:
             return None
-        return int(
+        last_updated = int(
             cls._date_string2timestamp(last_request)
             - cast(
                 float,
                 mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
         )
+        if not has_stream:
+            # If the endpoint does not have a stream, `last_updated` should be
+            # the minimum between the current time and the last updated time.
+            # This compensates for the bumping mechanism - see
+            # `bump_model_endpoint_last_request`.
+            last_updated = min(int(datetime_now().timestamp()), last_updated)
+            logger.debug(
+                "The endpoint does not have a stream", last_updated=last_updated
+            )
+        return last_updated
     @classmethod
     def _normalize_first_request(
         cls, first_request: Optional[str], endpoint: str
     ) -> Optional[int]:
         if not first_request:
-            logger.warn(
+            logger.debug(
                 "There is no first request time for this endpoint.",
                 endpoint=endpoint,
                 first_request=first_request,
@@ -223,6 +255,7 @@ class _BatchWindowGenerator:
         application: str,
         first_request: Optional[str],
         last_request: Optional[str],
+        has_stream: bool,
     ) -> _BatchWindow:
         """
         Get the batch window for a specific endpoint and application.
@@ -234,7 +267,7 @@ class _BatchWindowGenerator:
             endpoint=endpoint,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request),
+            last_updated=self._get_last_updated_time(last_request, has_stream),
             first_request=self._normalize_first_request(first_request, endpoint),
         )
@@ -259,20 +292,12 @@ class MonitoringApplicationController:
         """
         self.context = context
         self.project = project
+        self.project_obj = mlrun.get_or_create_project(project)
-        logger.info(
-            "Initializing MonitoringApplicationController",
-            project=project,
-        )
-        # Get a runtime database
+        context.logger.debug(f"Initializing {self.__class__.__name__}", project=project)
         self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
-        # If an error occurs, it will be raised using the following argument
-        self.endpoints_exceptions = {}
-        # The batch window
         self._batch_window_generator = _BatchWindowGenerator(
             batch_dict=context.parameters[
                 mm_constants.EventFieldType.BATCH_INTERVALS_DICT
@@ -285,7 +310,7 @@ class MonitoringApplicationController:
         )
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
         self.parquet_directory = get_monitoring_parquet_path(
-            project=project,
+            self.project_obj,
             kind=mm_constants.FileTargetKind.APPS_PARQUET,
         )
         self.storage_options = None
@@ -311,21 +336,23 @@ class MonitoringApplicationController:
     def run(self):
         """
-        Main method for run all the relevant monitoring application on each endpoint
+        Main method for run all the relevant monitoring applications on each endpoint
         """
         try:
             endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
-            application = mlrun.get_or_create_project(
-                self.project
-            ).list_model_monitoring_functions()
-            if application:
-                applications_names = list({app.metadata.name for app in application})
+            monitoring_functions = self.project_obj.list_model_monitoring_functions()
+            if monitoring_functions:
+                applications_names = list(
+                    {app.metadata.name for app in monitoring_functions}
+                )
             else:
-                logger.info("There are no monitoring application found in this project")
+                self.context.logger.info(
+                    "No monitoring functions found", project=self.project
+                )
                 applications_names = []
         except Exception as e:
-            logger.error("Failed to list endpoints", exc=e)
+            self.context.logger.error("Failed to list endpoints", exc=e)
             return
         if endpoints and applications_names:
             # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
@@ -362,9 +389,7 @@ class MonitoringApplicationController:
                     futures.append(future)
             for future in concurrent.futures.as_completed(futures):
-                res = future.result()
-                if res:
-                    self.endpoints_exceptions[res[0]] = res[1]
+                future.result()
             self._delete_old_parquet(endpoints=endpoints)
@@ -378,7 +403,7 @@ class MonitoringApplicationController:
         parquet_directory: str,
         storage_options: dict,
         model_monitoring_access_key: str,
-    ) -> Optional[Tuple[str, Exception]]:
+    ) -> None:
         """
         Process a model endpoint and trigger the monitoring applications. This function running on different process
         for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -413,6 +438,7 @@ class MonitoringApplicationController:
                     application=application,
                     first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
                     last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
+                    has_stream=endpoint[mm_constants.EventFieldType.STREAM_PATH] != "",
                 )
                 for start_infer_time, end_infer_time in batch_window.get_intervals():
@@ -432,22 +458,18 @@ class MonitoringApplicationController:
                         parquet_target_path = offline_response.vector.get_target_path()
                         if len(df) == 0:
-                            logger.warn(
-                                "Not enough model events since the beginning of the batch interval",
-                                featureset_name=m_fs.metadata.name,
+                            logger.info(
+                                "During this time window, the endpoint has not received any data",
                                 endpoint=endpoint[mm_constants.EventFieldType.UID],
-                                min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
                                 start_time=start_infer_time,
                                 end_time=end_infer_time,
                             )
                             continue
-                    # Continue if not enough events provided since the deployment of the model endpoint
                     except FileNotFoundError:
                         logger.warn(
-                            "Parquet not found, probably due to not enough model events",
+                            "No parquets were written yet",
                             endpoint=endpoint[mm_constants.EventFieldType.UID],
-                            min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
                         )
                         continue
@@ -481,12 +503,11 @@ class MonitoringApplicationController:
                         model_monitoring_access_key=model_monitoring_access_key,
                         parquet_target_path=parquet_target_path,
                     )
-        except Exception as e:
-            logger.error(
+        except Exception:
+            logger.exception(
                 "Encountered an exception",
                 endpoint_id=endpoint[mm_constants.EventFieldType.UID],
             )
-            return endpoint_id, e
     def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
         """
@@ -500,12 +521,14 @@ class MonitoringApplicationController:
                 self.parquet_directory,
                 {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
             )
-            fs = store.get_filesystem()
+            fs = store.filesystem
             # calculate time threshold (keep only files from the last 24 hours)
-            time_to_keep = float(
-                (datetime.datetime.now() - datetime.timedelta(days=days)).strftime("%s")
-            )
+            time_to_keep = (
+                datetime.datetime.now(tz=datetime.timezone.utc)
+                - datetime.timedelta(days=days)
+            ).timestamp()
             for endpoint in endpoints:
                 try:
                     apps_parquet_directories = fs.listdir(
@@ -619,14 +642,13 @@ class MonitoringApplicationController:
         # get offline features based on application start and end time.
         # store the result parquet by partitioning by controller end processing time
-        offline_response = fstore.get_offline_features(
-            feature_vector=vector,
+        offline_response = vector.get_offline_features(
             start_time=start_infer_time,
             end_time=end_infer_time,
             timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
             target=ParquetTarget(
                 path=parquet_directory
-                + f"/key={endpoint_id}/{start_infer_time.strftime('%s')}/{application_name}.parquet",
+                + f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
                 storage_options=storage_options,
             ),
         )

mlrun/model_monitoring/controller_handler.py CHANGED Viewed

@@ -16,7 +16,7 @@ import mlrun
 from mlrun.model_monitoring.controller import MonitoringApplicationController
-def handler(context: mlrun.run.MLClientCtx):
+def handler(context: mlrun.run.MLClientCtx) -> None:
     """
     Run model monitoring application processor
@@ -27,5 +27,3 @@ def handler(context: mlrun.run.MLClientCtx):
         project=context.project,
     )
     monitor_app_controller.run()
-    if monitor_app_controller.endpoints_exceptions:
-        context.logger.error(monitor_app_controller.endpoints_exceptions)

mlrun 1.6.0rc21__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc21py3-none-any.whl → 1.6.0rc22py3-none-any.whl