PyPI - mlrun - Versions diffs - 1.10.0rc19__py3-none-any.whl → 1.10.0rc21__py3-none-any.whl - Mend

mlrun 1.10.0rc19py3-none-any.whl → 1.10.0rc21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (31) hide show

mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/model_monitoring/constants.py +4 -11
mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
mlrun/datastore/model_provider/huggingface_provider.py +109 -20
mlrun/datastore/model_provider/model_provider.py +110 -32
mlrun/datastore/model_provider/openai_provider.py +87 -31
mlrun/db/base.py +0 -19
mlrun/db/httpdb.py +10 -46
mlrun/db/nopdb.py +0 -10
mlrun/launcher/base.py +0 -6
mlrun/model_monitoring/api.py +43 -22
mlrun/model_monitoring/applications/base.py +1 -1
mlrun/model_monitoring/controller.py +112 -38
mlrun/model_monitoring/db/_schedules.py +13 -9
mlrun/model_monitoring/stream_processing.py +16 -12
mlrun/platforms/__init__.py +3 -2
mlrun/projects/project.py +2 -2
mlrun/run.py +38 -5
mlrun/serving/server.py +23 -0
mlrun/serving/states.py +76 -29
mlrun/serving/system_steps.py +60 -36
mlrun/utils/helpers.py +27 -13
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/METADATA +6 -5
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/RECORD +30 -31
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/top_level.txt +0 -0

mlrun/db/httpdb.py CHANGED Viewed

@@ -24,6 +24,7 @@ from datetime import datetime, timedelta
 from os import environ, path, remove
 from typing import Literal, Optional, Union
 from urllib.parse import urlparse
+from uuid import UUID
 import pydantic.v1
 import requests
@@ -2554,50 +2555,6 @@ class HTTPRunDB(RunDBInterface):
         resp = self.api_call("GET", path, error_message)
         return FeatureSet.from_dict(resp.json())
-    def list_features(
-        self,
-        project: Optional[str] = None,
-        name: Optional[str] = None,
-        tag: Optional[str] = None,
-        entities: Optional[list[str]] = None,
-        labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
-    ) -> list[dict]:
-        """List feature-sets which contain specific features. This function may return multiple versions of the same
-        feature-set if a specific tag is not requested. Note that the various filters of this function actually
-        refer to the feature-set object containing the features, not to the features themselves.
-        :param project: Project which contains these features.
-        :param name: Name of the feature to look for. The name is used in a like query, and is not case-sensitive. For
-            example, looking for ``feat`` will return features which are named ``MyFeature`` as well as ``defeat``.
-        :param tag: Return feature-sets which contain the features looked for, and are tagged with the specific tag.
-        :param entities: Return only feature-sets which contain an entity whose name is contained in this list.
-        :param labels: Filter feature-sets by label key-value pairs or key existence. This can be provided as:
-            - A dictionary in the format `{"label": "value"}` to match specific label key-value pairs,
-            or `{"label": None}` to check for key existence.
-            - A list of strings formatted as `"label=value"` to match specific label key-value pairs,
-            or just `"label"` for key existence.
-            - A comma-separated string formatted as `"label1=value1,label2"` to match entities with
-            the specified key-value pairs or key existence.
-        :returns: A list of mapping from feature to a digest of the feature-set, which contains the feature-set
-            meta-data. Multiple entries may be returned for any specific feature due to multiple tags or versions
-            of the feature-set.
-        """
-        project = project or config.active_project
-        labels = self._parse_labels(labels)
-        params = {
-            "name": name,
-            "tag": tag,
-            "entity": entities or [],
-            "label": labels,
-        }
-        path = f"projects/{project}/features"
-        error_message = f"Failed listing features, project: {project}, query: {params}"
-        resp = self.api_call("GET", path, error_message, params=params)
-        return resp.json()["features"]
     def list_features_v2(
         self,
         project: Optional[str] = None,
@@ -3834,8 +3791,8 @@ class HTTPRunDB(RunDBInterface):
                                 If tsdb_metrics=False, this parameter will be ignored and no tsdb metrics
                                 will be included.
         :param top_level:       Whether to return only top level model endpoints.
-        :param mode:            Specifies the mode of the model endpoint. Can be "real-time", "batch", or both if set
-                                to None.
+        :param mode:            Specifies the mode of the model endpoint. Can be "real-time" (0), "batch" (1), or
+                                both if set to None.
         :param uids:            A list of unique ids to filter by.
         :param latest_only:     Whether to return only the latest model endpoint version.
         :return:                A list of model endpoints.
@@ -3968,6 +3925,13 @@ class HTTPRunDB(RunDBInterface):
             raise MLRunInvalidArgumentError(
                 "Either endpoint_uid or function_name and function_tag must be provided"
             )
+        if uid:
+            try:
+                UUID(uid)
+            except (ValueError, TypeError):
+                raise MLRunInvalidArgumentError(
+                    "endpoint_id must be a valid UUID string"
+                )
     def update_model_monitoring_controller(
         self,

mlrun/db/nopdb.py CHANGED Viewed

@@ -376,16 +376,6 @@ class NopDB(RunDBInterface):
     ) -> dict:
         pass
-    def list_features(
-        self,
-        project: str,
-        name: Optional[str] = None,
-        tag: Optional[str] = None,
-        entities: Optional[list[str]] = None,
-        labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
-    ) -> mlrun.common.schemas.FeaturesOutput:
-        pass
     def list_features_v2(
         self,
         project: str,

mlrun/launcher/base.py CHANGED Viewed

@@ -281,12 +281,6 @@ class BaseLauncher(abc.ABC):
         run.metadata.name = mlrun.utils.normalize_name(
             name=name or run.metadata.name or def_name,
-            # if name or runspec.metadata.name are set then it means that is user defined name and we want to warn the
-            # user that the passed name needs to be set without underscore, if its not user defined but rather enriched
-            # from the handler(function) name then we replace the underscore without warning the user.
-            # most of the time handlers will have `_` in the handler name (python convention is to separate function
-            # words with `_`), therefore we don't want to be noisy when normalizing the run name
-            verbose=bool(name or run.metadata.name),
         )
         mlrun.utils.verify_field_regex(
             "run.metadata.name", run.metadata.name, mlrun.utils.regex.run_name

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -18,6 +18,7 @@ from datetime import datetime
 import numpy as np
 import pandas as pd
+from deprecated import deprecated
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.datastore.base
@@ -45,6 +46,14 @@ DatasetType = typing.Union[
 ]
+# TODO: Remove this in 1.12.0
+@deprecated(
+    version="1.10.0",
+    reason="This function is deprecated and will be removed in 1.12. You can generate a model endpoint by either "
+    "deploying a monitored serving function as a real-time service or running it as an offline job. "
+    "To retrieve model endpoints, use `project.list_model_endpoints()`",
+    category=FutureWarning,
+)
 def get_or_create_model_endpoint(
     project: str,
     model_endpoint_name: str,
@@ -67,8 +76,8 @@ def get_or_create_model_endpoint(
     :param model_endpoint_name:      If a new model endpoint is created, the model endpoint name will be presented
                                      under this endpoint (applicable only to new endpoint_id).
     :param model_path:               The model store path (applicable only to new endpoint_id).
-    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record based
-                                     on the provided `endpoint_id`.
+    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record with a
+                                     newly generated ID.
     :param function_name:            If a new model endpoint is created, use this function name.
     :param function_tag:             If a new model endpoint is created, use this function tag.
     :param context:                  MLRun context. If `function_name` not provided, use the context to generate the
@@ -91,25 +100,26 @@ def get_or_create_model_endpoint(
         function_name = FunctionURI.from_string(
             context.to_dict()["spec"]["function"]
         ).function
-    try:
-        model_endpoint = db_session.get_model_endpoint(
-            project=project,
-            name=model_endpoint_name,
-            endpoint_id=endpoint_id,
-            function_name=function_name,
-            function_tag=function_tag or "latest",
-            feature_analysis=feature_analysis,
-        )
-        # If other fields provided, validate that they are correspond to the existing model endpoint data
-        _model_endpoint_validations(
-            model_endpoint=model_endpoint,
-            model_path=model_path,
-            sample_set_statistics=sample_set_statistics,
-        )
+    if endpoint_id or function_name:
+        try:
+            model_endpoint = db_session.get_model_endpoint(
+                project=project,
+                name=model_endpoint_name,
+                endpoint_id=endpoint_id,
+                function_name=function_name,
+                function_tag=function_tag or "latest",
+                feature_analysis=feature_analysis,
+            )
+            # If other fields provided, validate that they are correspond to the existing model endpoint data
+            _model_endpoint_validations(
+                model_endpoint=model_endpoint,
+                model_path=model_path,
+                sample_set_statistics=sample_set_statistics,
+            )
-    except (mlrun.errors.MLRunNotFoundError, mlrun.errors.MLRunInvalidArgumentError):
-        # Create a new model endpoint with the provided details
-        pass
+        except mlrun.errors.MLRunNotFoundError:
+            # Create a new model endpoint with the provided details
+            pass
     if not model_endpoint:
         model_endpoint = _generate_model_endpoint(
             project=project,
@@ -123,6 +133,13 @@ def get_or_create_model_endpoint(
     return model_endpoint
+# TODO: Remove this in 1.12.0
+@deprecated(
+    version="1.10.0",
+    reason="This function is deprecated and will be removed in 1.12. "
+    "Instead, run a monitored serving function as a job",
+    category=FutureWarning,
+)
 def record_results(
     project: str,
     model_path: str,
@@ -144,8 +161,8 @@ def record_results(
     :param model_path:               The model Store path.
     :param model_endpoint_name:      If a new model endpoint is generated, the model endpoint name will be presented
                                      under this endpoint.
-    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record based
-                                     on the provided `endpoint_id`.
+    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record with a
+                                     newly generated ID.
     :param function_name:            If a new model endpoint is created, use this function name for generating the
                                      function URI.
     :param context:                  MLRun context. Note that the context is required generating the model endpoint.
@@ -236,6 +253,7 @@ def _model_endpoint_validations(
             key=model_obj.key,
             iter=model_obj.iter,
             tree=model_obj.tree,
+            uid=model_obj.uid,
         )
         # Enrich the uri schema with the store prefix
@@ -325,12 +343,15 @@ def _generate_model_endpoint(
     :return `mlrun.common.schemas.ModelEndpoint` object.
     """
     current_time = datetime_now()
     model_endpoint = mlrun.common.schemas.ModelEndpoint(
         metadata=mlrun.common.schemas.ModelEndpointMetadata(
             project=project,
             name=model_endpoint_name,
             endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
+            # Due to backwards compatibility, old batch model endpoint will be analyzed as real time endpoint
+            mode=mlrun.common.schemas.model_monitoring.EndpointMode.REAL_TIME,
         ),
         spec=mlrun.common.schemas.ModelEndpointSpec(
             function_name=function_name or "function",

mlrun/model_monitoring/applications/base.py CHANGED Viewed

@@ -647,7 +647,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
             else:
                 class_name = handler_to_class.split(".")[-1].split("::")[0]
-            job_name = mlrun.utils.normalize_name(class_name, verbose=False)
+            job_name = mlrun.utils.normalize_name(class_name)
         if not mm_constants.APP_NAME_REGEX.fullmatch(job_name):
             raise mlrun.errors.MLRunValueError(

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -11,33 +11,37 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import collections
 import concurrent.futures
 import datetime
 import json
 import os
 import traceback
+import warnings
 from collections.abc import Iterator
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Any, NamedTuple, Optional, Union, cast
+from typing import Any, Final, NamedTuple, Optional, Union, cast
 import nuclio_sdk
+import numpy as np
 import pandas as pd
 import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.feature_store as fstore
 import mlrun.model_monitoring
 import mlrun.model_monitoring.db._schedules as schedules
 import mlrun.model_monitoring.helpers
 import mlrun.platforms.iguazio
+from mlrun.common.schemas import EndpointType
 from mlrun.common.schemas.model_monitoring.constants import (
     ControllerEvent,
     ControllerEventEndpointPolicy,
 )
 from mlrun.errors import err_to_str
 from mlrun.model_monitoring.helpers import batch_dict2timedelta
-from mlrun.utils import logger
+from mlrun.utils import datetime_now, logger
 _SECONDS_IN_DAY = int(datetime.timedelta(days=1).total_seconds())
 _SECONDS_IN_MINUTE = 60
@@ -49,14 +53,16 @@ class _Interval(NamedTuple):
 class _BatchWindow:
+    TIMESTAMP_RESOLUTION_MICRO: Final = 1e-6  # 0.000001 seconds or 1 microsecond
     def __init__(
         self,
         *,
         schedules_file: schedules.ModelMonitoringSchedulesFileEndpoint,
         application: str,
         timedelta_seconds: int,
-        last_updated: int,
-        first_request: int,
+        last_updated: float,
+        first_request: float,
         endpoint_mode: mm_constants.EndpointMode = mm_constants.EndpointMode.REAL_TIME,
     ) -> None:
         """
@@ -73,15 +79,17 @@ class _BatchWindow:
         self._endpoint_mode = endpoint_mode
         self._start = self._get_last_analyzed()
-    def _get_saved_last_analyzed(self) -> Optional[int]:
-        return cast(int, self._db.get_application_time(self._application))
+    def _get_saved_last_analyzed(
+        self,
+    ) -> Optional[float]:
+        return self._db.get_application_time(self._application)
-    def _update_last_analyzed(self, last_analyzed: int) -> None:
+    def _update_last_analyzed(self, last_analyzed: float) -> None:
         self._db.update_application_time(
             application=self._application, timestamp=last_analyzed
         )
-    def _get_initial_last_analyzed(self) -> int:
+    def _get_initial_last_analyzed(self) -> float:
         if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
             logger.info(
                 "No last analyzed time was found for this endpoint and application, as this is "
@@ -107,7 +115,7 @@ class _BatchWindow:
             self._stop - first_period_in_seconds,
         )
-    def _get_last_analyzed(self) -> int:
+    def _get_last_analyzed(self) -> float:
         saved_last_analyzed = self._get_saved_last_analyzed()
         if saved_last_analyzed is not None:
             if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
@@ -127,13 +135,16 @@ class _BatchWindow:
         # Iterate timestamp from start until timestamp <= stop - step
         # so that the last interval will end at (timestamp + step) <= stop.
         # Add 1 to stop - step to get <= and not <.
-        for timestamp in range(self._start, self._stop - self._step + 1, self._step):
+        for timestamp in np.arange(
+            self._start, self._stop - self._step + 1, self._step
+        ):
             entered = True
             start_time = datetime.datetime.fromtimestamp(
                 timestamp, tz=datetime.timezone.utc
             )
             end_time = datetime.datetime.fromtimestamp(
-                timestamp + self._step, tz=datetime.timezone.utc
+                timestamp - self.TIMESTAMP_RESOLUTION_MICRO + self._step,
+                tz=datetime.timezone.utc,
             )
             yield _Interval(start_time, end_time)
@@ -149,7 +160,7 @@ class _BatchWindow:
             # If the endpoint is a batch endpoint, we need to update the last analyzed time
             # to the end of the batch time.
             if last_analyzed:
-                if last_analyzed < self._stop:
+                if last_analyzed - self.TIMESTAMP_RESOLUTION_MICRO < self._stop:
                     # If the last analyzed time is earlier than the stop time,
                     # yield the final partial interval from last_analyzed to stop
                     yield _Interval(
@@ -223,7 +234,7 @@ class _BatchWindowGenerator(AbstractContextManager):
     def get_application_list(self) -> set[str]:
         return self._schedules_file.get_application_list()
-    def get_min_last_analyzed(self) -> Optional[int]:
+    def get_min_last_analyzed(self) -> Optional[float]:
         return self._schedules_file.get_min_timestamp()
     @classmethod
@@ -231,22 +242,29 @@ class _BatchWindowGenerator(AbstractContextManager):
         cls,
         last_request: datetime.datetime,
         endpoint_mode: mm_constants.EndpointMode,
-    ) -> int:
+        not_old_batch_endpoint: bool,
+    ) -> float:
         """
         Get the last updated time of a model endpoint.
         """
         if endpoint_mode == mm_constants.EndpointMode.REAL_TIME:
-            last_updated = int(
-                last_request.timestamp()
-                - cast(
-                    float,
-                    mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
-                )
+            last_updated = last_request.timestamp() - cast(
+                float,
+                mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
             )
+            if not not_old_batch_endpoint:
+                # If the endpoint does not have a stream, `last_updated` should be
+                # the minimum between the current time and the last updated time.
+                # This compensates for the bumping mechanism - see
+                # `update_model_endpoint_last_request`.
+                last_updated = min(datetime_now().timestamp(), last_updated)
+                logger.debug(
+                    "The endpoint does not have a stream", last_updated=last_updated
+                )
             return last_updated
-        return int(last_request.timestamp())
+        return last_request.timestamp()
     def get_intervals(
         self,
@@ -255,6 +273,7 @@ class _BatchWindowGenerator(AbstractContextManager):
         first_request: datetime.datetime,
         last_request: datetime.datetime,
         endpoint_mode: mm_constants.EndpointMode,
+        not_old_batch_endpoint: bool,
     ) -> Iterator[_Interval]:
         """
         Get the batch window for a specific endpoint and application.
@@ -266,8 +285,10 @@ class _BatchWindowGenerator(AbstractContextManager):
             schedules_file=self._schedules_file,
             application=application,
             timedelta_seconds=self._timedelta,
-            last_updated=self._get_last_updated_time(last_request, endpoint_mode),
-            first_request=int(first_request.timestamp()),
+            last_updated=self._get_last_updated_time(
+                last_request, endpoint_mode, not_old_batch_endpoint
+            ),
+            first_request=first_request.timestamp(),
             endpoint_mode=endpoint_mode,
         )
         yield from self.batch_window.get_intervals()
@@ -291,6 +312,8 @@ class MonitoringApplicationController:
     Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
     """
+    _MAX_FEATURE_SET_PER_WORKER = 1000
     def __init__(self) -> None:
         """Initialize Monitoring Application Controller"""
         self.project = cast(str, mlrun.mlconf.active_project)
@@ -324,6 +347,9 @@ class MonitoringApplicationController:
                 mlrun.platforms.iguazio.KafkaOutputStream,
             ],
         ] = {}
+        self.feature_sets: collections.OrderedDict[
+            str, mlrun.feature_store.FeatureSet
+        ] = collections.OrderedDict()
         self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
             project=self.project
         )
@@ -433,15 +459,14 @@ class MonitoringApplicationController:
                         base_period_minutes, current_min_last_analyzed, current_time
                     )
                     and (
-                        int(endpoint.status.last_request.timestamp())
-                        != last_timestamp_sent
+                        endpoint.status.last_request.timestamp() != last_timestamp_sent
                         or current_min_last_analyzed != last_analyzed_sent
                     )
                 ):
                     # Write to schedule chief file the last_request, min_last_analyzed we pushed event to stream
                     schedules_file.update_endpoint_timestamps(
                         endpoint_uid=endpoint.metadata.uid,
-                        last_request=int(endpoint.status.last_request.timestamp()),
+                        last_request=endpoint.status.last_request.timestamp(),
                         last_analyzed=current_min_last_analyzed,
                     )
                     return True
@@ -460,13 +485,14 @@ class MonitoringApplicationController:
                 last_request=endpoint.status.last_request,
                 first_request=endpoint.status.first_request,
                 endpoint_type=endpoint.metadata.endpoint_type,
+                feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
             )
         return False
     @staticmethod
     def _should_send_nop_event(
         base_period_minutes: int,
-        min_last_analyzed: int,
+        min_last_analyzed: float,
         current_time: datetime.datetime,
     ):
         if min_last_analyzed:
@@ -515,7 +541,7 @@ class MonitoringApplicationController:
         try:
             project_name = event[ControllerEvent.PROJECT]
             endpoint_id = event[ControllerEvent.ENDPOINT_ID]
+            not_old_batch_endpoint = True
             if (
                 event[ControllerEvent.KIND]
                 == mm_constants.ControllerEventKind.BATCH_COMPLETE
@@ -572,6 +598,10 @@ class MonitoringApplicationController:
                 endpoint_mode = mm_constants.EndpointMode.REAL_TIME
+                not_old_batch_endpoint = (
+                    event[ControllerEvent.ENDPOINT_TYPE] != EndpointType.BATCH_EP
+                )
             logger.info(
                 "Starting to analyze", timestamp=last_stream_timestamp.isoformat()
             )
@@ -590,16 +620,49 @@ class MonitoringApplicationController:
                         first_request=first_request,
                         last_request=last_stream_timestamp,
                         endpoint_mode=endpoint_mode,
+                        not_old_batch_endpoint=not_old_batch_endpoint,
                     ):
                         data_in_window = False
-                        # Serving endpoint - get the relevant window data from the TSDB
-                        prediction_metric = self.tsdb_connector.read_predictions(
-                            start=start_infer_time,
-                            end=end_infer_time,
-                            endpoint_id=endpoint_id,
-                        )
-                        if prediction_metric.data:
-                            data_in_window = True
+                        if not_old_batch_endpoint:
+                            # Serving endpoint - get the relevant window data from the TSDB
+                            prediction_metric = self.tsdb_connector.read_predictions(
+                                start=start_infer_time,
+                                end=end_infer_time,
+                                endpoint_id=endpoint_id,
+                            )
+                            if prediction_metric.data:
+                                data_in_window = True
+                        else:
+                            # Old batch endpoint - get the relevant window data from the parquet target
+                            warnings.warn(
+                                "Analyzing batch model endpoints with real time processing events is "
+                                "deprecated in 1.10.0 and will be removed in 1.12.0. "
+                                "Instead, use job-based serving to invoke and analyze offline batch model"
+                                "endpoints.",
+                                # TODO: Remove this in 1.12.0
+                                FutureWarning,
+                            )
+                            if endpoint_id not in self.feature_sets:
+                                self.feature_sets[endpoint_id] = fstore.get_feature_set(
+                                    event[ControllerEvent.FEATURE_SET_URI]
+                                )
+                            self.feature_sets.move_to_end(endpoint_id, last=False)
+                            if (
+                                len(self.feature_sets)
+                                > self._MAX_FEATURE_SET_PER_WORKER
+                            ):
+                                self.feature_sets.popitem(last=True)
+                            m_fs = self.feature_sets.get(endpoint_id)
+                            df = m_fs.to_dataframe(
+                                start_time=start_infer_time,
+                                end_time=end_infer_time,
+                                time_column=mm_constants.EventFieldType.TIMESTAMP,
+                                storage_options=self.storage_options,
+                            )
+                            if len(df) > 0:
+                                data_in_window = True
                         if not data_in_window:
                             logger.info(
@@ -616,7 +679,10 @@ class MonitoringApplicationController:
                                 endpoint_id=endpoint_id,
                             )
                             self._push_to_applications(
-                                start_infer_time=start_infer_time,
+                                start_infer_time=start_infer_time
+                                - datetime.timedelta(
+                                    batch_window_generator.batch_window.TIMESTAMP_RESOLUTION_MICRO
+                                ),  # We subtract a microsecond to ensure that the apps will retrieve start time data.
                                 end_infer_time=end_infer_time,
                                 endpoint_id=endpoint_id,
                                 endpoint_name=endpoint_name,
@@ -653,6 +719,9 @@ class MonitoringApplicationController:
                             ControllerEvent.ENDPOINT_TYPE: event[
                                 ControllerEvent.ENDPOINT_TYPE
                             ],
+                            ControllerEvent.FEATURE_SET_URI: event[
+                                ControllerEvent.FEATURE_SET_URI
+                            ],
                             ControllerEvent.FIRST_REQUEST: event[
                                 ControllerEvent.FIRST_REQUEST
                             ],
@@ -842,6 +911,7 @@ class MonitoringApplicationController:
                     sep=" ", timespec="microseconds"
                 ),
                 endpoint_type=endpoint.metadata.endpoint_type,
+                feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
                 endpoint_policy=json.dumps(policy),
             )
             policy[ControllerEventEndpointPolicy.ENDPOINT_UPDATED] = (
@@ -859,6 +929,7 @@ class MonitoringApplicationController:
                     sep=" ", timespec="microseconds"
                 ),
                 endpoint_type=endpoint.metadata.endpoint_type.value,
+                feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
                 endpoint_policy=policy,
             )
@@ -871,6 +942,7 @@ class MonitoringApplicationController:
         timestamp: str,
         first_request: str,
         endpoint_type: int,
+        feature_set_uri: str,
         endpoint_policy: dict[str, Any],
     ) -> None:
         """
@@ -883,6 +955,7 @@ class MonitoringApplicationController:
         :param endpoint_id: endpoint id string
         :param endpoint_name: the endpoint name string
         :param endpoint_type: Enum of the endpoint type
+        :param feature_set_uri: the feature set uri string
         """
         event = {
             ControllerEvent.KIND.value: kind,
@@ -892,6 +965,7 @@ class MonitoringApplicationController:
             ControllerEvent.TIMESTAMP.value: timestamp,
             ControllerEvent.FIRST_REQUEST.value: first_request,
             ControllerEvent.ENDPOINT_TYPE.value: endpoint_type,
+            ControllerEvent.FEATURE_SET_URI.value: feature_set_uri,
             ControllerEvent.ENDPOINT_POLICY.value: endpoint_policy,
         }
         logger.info(

mlrun 1.10.0rc19__py3-none-any.whl → 1.10.0rc21__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc19py3-none-any.whl → 1.10.0rc21py3-none-any.whl