PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py CHANGED Viewed

@@ -25,10 +25,12 @@ from mlrun.utils import logger
 def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
     """
-    Normalize user defined keys - input data to a model and its predictions,
-    to a form V3IO frames tolerates.
+    Normalize user-defined keys (e.g., model input data and predictions) to a format V3IO Frames tolerates.
-    The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
+    - Keys must match regex: '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'
+    - Replace invalid characters (e.g., '-') with '_'.
+    - Prefix keys starting with digits with '_'.
+    - Flatten nested dictionaries using dot notation, while normalizing keys recursively.
     """
     prefix = "_"
@@ -38,7 +40,18 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
             return prefix + key
         return key
-    return {norm_key(k): v for k, v in event.items()}
+    def flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
+        items = {}
+        for k, v in d.items():
+            new_key = norm_key(k)
+            full_key = f"{parent_key}:{new_key}" if parent_key else new_key
+            if isinstance(v, dict):
+                items.update(flatten_dict(v, full_key))
+            else:
+                items[full_key] = v
+        return items
+    return flatten_dict(event)
 class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
@@ -138,11 +151,9 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
 class ErrorExtractor(mlrun.feature_store.steps.MapClass):
-    def __init__(self, **kwargs):
-        """
-        Prepare the event for insertion into the errors TSDB table.
-        """
-        super().__init__(**kwargs)
+    """
+    Prepare the event for insertion into the errors TSDB table.
+    """
     def do(self, event):
         error = event.get("error")

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
+from collections import defaultdict
 from datetime import datetime, timedelta
 from io import StringIO
-from typing import Callable, Literal, Optional, Union
+from typing import Literal, Optional, Union
 import pandas as pd
 import v3io_frames
@@ -25,6 +26,7 @@ import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.feature_store.steps
 import mlrun.utils.v3io_clients
 from mlrun.common.schemas import EventFieldType
+from mlrun.config import config
 from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.model_monitoring.helpers import get_invocations_fqn, get_start_end
 from mlrun.utils import logger
@@ -224,6 +226,24 @@ class V3IOTSDBConnector(TSDBConnector):
         - endpoint_features (Prediction and feature names and values)
         - custom_metrics (user-defined metrics)
         """
+        def apply_list_to_single_dict():
+            graph.add_step(
+                "storey.Map",
+                "MapListToSingleDict",
+                after="FilterNOP",
+                _fn="(event[0] if isinstance(event, list) else event)",
+            )
+            graph.add_step(
+                "mlrun.model_monitoring.stream_processing.MapFeatureNames",
+                name="MapFeatureNamesTSDB",
+                infer_columns_from_data=True,
+                project=self.project,
+                after="MapListToSingleDict",
+            )
+        apply_list_to_single_dict()
         aggregate_windows = aggregate_windows or ["5m", "1h"]
         # Calculate number of predictions and average latency
@@ -241,7 +261,7 @@ class V3IOTSDBConnector(TSDBConnector):
                     }
                 ],
                 name=EventFieldType.LATENCY,
-                after="FilterNOP",
+                after="MapFeatureNamesTSDB",
                 step_name="Aggregates",
                 table=".",
                 key_field=EventFieldType.ENDPOINT_ID,
@@ -262,7 +282,7 @@ class V3IOTSDBConnector(TSDBConnector):
         graph.add_step(
             "storey.TSDBTarget",
             name="tsdb_predictions",
-            after="FilterNOP",
+            after="MapFeatureNamesTSDB",
             path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS]}",
             time_col=mm_schemas.EventFieldType.TIMESTAMP,
             container=self.container,
@@ -369,6 +389,49 @@ class V3IOTSDBConnector(TSDBConnector):
         apply_storey_filter()
         apply_tsdb_target(name="tsdb3", after="FilterNotNone")
+    def apply_writer_steps(self, graph, after, **kwargs) -> None:
+        graph.add_step(
+            "storey.TSDBTarget",
+            name="tsdb_metrics",
+            after=after,
+            path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.METRICS]}",
+            time_col=mm_schemas.WriterEvent.END_INFER_TIME,
+            container=self.container,
+            v3io_frames=self.v3io_framesd,
+            infer_columns_from_data=True,
+            graph_shape="cylinder",
+            index_cols=[
+                mm_schemas.WriterEvent.APPLICATION_NAME,
+                mm_schemas.WriterEvent.ENDPOINT_NAME,
+                mm_schemas.WriterEvent.ENDPOINT_ID,
+                mm_schemas.MetricData.METRIC_NAME,
+            ],
+            max_events=config.model_endpoint_monitoring.writer_graph.max_events,
+            flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
+            key=mm_schemas.EventFieldType.ENDPOINT_ID,
+        )
+        graph.add_step(
+            "storey.TSDBTarget",
+            name="tsdb_app_results",
+            after=after,
+            path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]}",
+            time_col=mm_schemas.WriterEvent.END_INFER_TIME,
+            container=self.container,
+            v3io_frames=self.v3io_framesd,
+            infer_columns_from_data=True,
+            graph_shape="cylinder",
+            index_cols=[
+                mm_schemas.WriterEvent.APPLICATION_NAME,
+                mm_schemas.WriterEvent.ENDPOINT_NAME,
+                mm_schemas.WriterEvent.ENDPOINT_ID,
+                mm_schemas.ResultData.RESULT_NAME,
+            ],
+            max_events=config.model_endpoint_monitoring.writer_graph.max_events,
+            flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
+            key=mm_schemas.EventFieldType.ENDPOINT_ID,
+        )
     def handle_model_error(
         self,
         graph,
@@ -492,7 +555,8 @@ class V3IOTSDBConnector(TSDBConnector):
         # Split the endpoint ids into chunks to avoid exceeding the v3io-engine filter-expression limit
         for i in range(0, len(endpoint_ids), V3IO_FRAMESD_MEPS_LIMIT):
             endpoint_id_chunk = endpoint_ids[i : i + V3IO_FRAMESD_MEPS_LIMIT]
-            filter_query = f"endpoint_id IN({str(endpoint_id_chunk)[1:-1]}) "
+            endpoints_list = "', '".join(endpoint_id_chunk)
+            filter_query = f"endpoint_id IN('{endpoints_list}')"
             for table in tables:
                 try:
                     self.frames_client.delete(
@@ -532,6 +596,43 @@ class V3IOTSDBConnector(TSDBConnector):
                     project=self.project,
                 )
+    def delete_application_records(
+        self, application_name: str, endpoint_ids: Optional[list[str]] = None
+    ) -> None:
+        """
+        Delete application records from the TSDB for the given model endpoints or all if ``endpoint_ids`` is ``None``.
+        """
+        base_filter_query = f"application_name=='{application_name}'"
+        filter_queries: list[str] = []
+        if endpoint_ids:
+            for i in range(0, len(endpoint_ids), V3IO_FRAMESD_MEPS_LIMIT):
+                endpoint_id_chunk = endpoint_ids[i : i + V3IO_FRAMESD_MEPS_LIMIT]
+                endpoints_list = "', '".join(endpoint_id_chunk)
+                filter_queries.append(
+                    f"{base_filter_query} AND endpoint_id IN ('{endpoints_list}')"
+                )
+        else:
+            filter_queries = [base_filter_query]
+        for table in [
+            self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS],
+            self.tables[mm_schemas.V3IOTSDBTables.METRICS],
+        ]:
+            logger.debug(
+                "Deleting application records from TSDB",
+                table=table,
+                filter_queries=filter_queries,
+                project=self.project,
+            )
+            for filter_query in filter_queries:
+                self.frames_client.delete(
+                    backend=_TSDB_BE,
+                    table=table,
+                    filter=filter_query,
+                    start="0",
+                )
     def get_model_endpoint_real_time_metrics(
         self, endpoint_id: str, metrics: list[str], start: str, end: str
     ) -> dict[str, list[tuple[str, float]]]:
@@ -935,6 +1036,9 @@ class V3IOTSDBConnector(TSDBConnector):
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
     ) -> dict[str, float]:
+        if not endpoint_ids:
+            return {}
         # Get the last request timestamp for each endpoint from the KV table.
         # The result of the query is a list of dictionaries,
         # each dictionary contains the endpoint id and the last request timestamp.
@@ -1145,11 +1249,9 @@ class V3IOTSDBConnector(TSDBConnector):
             )
         return df.reset_index(drop=True)
-    async def add_basic_metrics(
+    def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
-        project: str,
-        run_in_threadpool: Callable,
         metric_list: Optional[list[str]] = None,
     ) -> list[mlrun.common.schemas.ModelEndpoint]:
         """
@@ -1157,8 +1259,6 @@ class V3IOTSDBConnector(TSDBConnector):
         :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
                                        be filled with the relevant basic metrics.
-        :param project:                The name of the project.
-        :param run_in_threadpool:      A function that runs another function in a thread pool.
         :param metric_list:            List of metrics to include from the time series DB. Defaults to all metrics.
         :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -1187,8 +1287,7 @@ class V3IOTSDBConnector(TSDBConnector):
             function,
             _,
         ) in metric_name_to_function_and_column_name.items():
-            metric_name_to_result[metric_name] = await run_in_threadpool(
-                function,
+            metric_name_to_result[metric_name] = function(
                 endpoint_ids=uids,
                 get_raw=True,
             )
@@ -1259,7 +1358,7 @@ class V3IOTSDBConnector(TSDBConnector):
             else:
                 filter_query = app_filter_query
-        df = self._get_records(
+        raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
             table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
             start=start,
             end=end,
@@ -1268,39 +1367,33 @@ class V3IOTSDBConnector(TSDBConnector):
                 mm_schemas.ResultData.RESULT_STATUS,
             ],
             filter_query=filter_query,
+            get_raw=True,
         )
-        # filter result status
-        if result_status_list and not df.empty:
-            df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
-        if df.empty:
+        if not raw_frames:
             return {}
-        else:
-            # convert application name to lower case
-            df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
-                mm_schemas.ApplicationEvent.APPLICATION_NAME
-            ].str.lower()
-            df = (
-                df[
-                    [
-                        mm_schemas.ApplicationEvent.APPLICATION_NAME,
-                        mm_schemas.ResultData.RESULT_STATUS,
-                        mm_schemas.ResultData.RESULT_VALUE,
-                    ]
-                ]
-                .groupby(
-                    [
-                        mm_schemas.ApplicationEvent.APPLICATION_NAME,
-                        mm_schemas.ResultData.RESULT_STATUS,
-                    ],
-                    observed=True,
-                )
-                .count()
-            )
-            return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
+        # Count occurrences by (application_name, result_status) from RawFrame objects
+        count_dict = {}
+        for frame in raw_frames:
+            # Extract column data from each RawFrame
+            app_name = frame.column_data(mm_schemas.ApplicationEvent.APPLICATION_NAME)[
+                0
+            ]
+            statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
+            for status in statuses:
+                # Filter by result status if specified
+                if result_status_list and status not in result_status_list:
+                    continue
+                # Convert application name to lower case
+                key = (app_name.lower(), status)
+                # Update the count in the dictionary
+                count_dict[key] = count_dict.get(key, 0) + 1
+        return count_dict
     def count_processed_model_endpoints(
         self,
@@ -1459,19 +1552,110 @@ class V3IOTSDBConnector(TSDBConnector):
         table = mm_schemas.V3IOTSDBTables.APP_RESULTS
         start, end, interval = self._prepare_aligned_start_end(start, end)
-        # get per time-interval x endpoint_id combination the max result status
-        df = self._get_records(
+        raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
             table=table,
             start=start,
             end=end,
-            interval=interval,
-            sliding_window_step=interval,
             columns=[mm_schemas.ResultData.RESULT_STATUS],
-            agg_funcs=["max"],
-            group_by=mm_schemas.WriterEvent.ENDPOINT_ID,
+            get_raw=True,
         )
-        if df.empty:
+        if not raw_frames:
             return mm_schemas.ModelEndpointDriftValues(values=[])
-        df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
-        df = df.reset_index(names="_wstart")
-        return self._df_to_drift_data(df)
+        # Combine aggregation, filtering, and conversion in one pass
+        drift_values = self._process_drifted_endpoints_data(
+            raw_frames=raw_frames, start=start, end=end, interval=interval
+        )
+        return drift_values
+    @staticmethod
+    def _process_drifted_endpoints_data(
+        raw_frames: list[v3io_frames.client.RawFrame],
+        start: datetime,
+        end: datetime,
+        interval: str,
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        """
+        Optimized single-pass processing of drift data from RawFrame objects.
+        Combines aggregation, filtering, and conversion into one operation.
+        :param raw_frames: List of RawFrame objects containing drift data.
+        :param start:      Start datetime for filtering data.
+        :param end:        End datetime for filtering data.
+        :param interval:   Time interval string (e.g., '5min') for aggregation
+        :returns: ModelEndpointDriftValues with counts of suspected and detected per timestamp
+        """
+        if not raw_frames:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        # Pre-compute constants
+        interval_td = pd.Timedelta(interval)
+        interval_ns = interval_td.value  # nanoseconds for integer arithmetic
+        start_ns = pd.Timestamp(start).value
+        end_ns = pd.Timestamp(end).value
+        suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
+        detected_val = mm_schemas.constants.ResultStatusApp.detected.value
+        # Single dictionary to track: bucket_start_ns -> {endpoint_id -> max_status}
+        # This allows us to calculate max per endpoint per bucket in one pass
+        bucket_endpoint_status = defaultdict(dict)
+        for frame in raw_frames:
+            endpoint_id = frame.column_data(EventFieldType.ENDPOINT_ID)[0]
+            result_statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
+            timestamps = frame.indices()[0].times
+            for status, timestamp_ns in zip(result_statuses, timestamps):
+                # Early skip: invalid status or outside time range
+                if status is None or math.isnan(status) or status < 1:
+                    continue
+                if not (start_ns <= timestamp_ns < end_ns):
+                    continue
+                # Calculate bucket using integer arithmetic
+                bucket_index = (timestamp_ns - start_ns) // interval_ns
+                bucket_start_ns = start_ns + (bucket_index * interval_ns)
+                # Initialize bucket if needed
+                bucket = bucket_endpoint_status[bucket_start_ns]
+                bucket[endpoint_id] = max(bucket.get(endpoint_id, status), status)
+        if not bucket_endpoint_status:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        # Second pass: count suspected/detected per timestamp bucket
+        # Structure: bucket_start_ns -> {count_suspected, count_detected}
+        timestamp_counts = {}
+        for bucket_start_ns, endpoint_statuses in bucket_endpoint_status.items():
+            count_suspected = 0
+            count_detected = 0
+            for status in endpoint_statuses.values():
+                if status == suspected_val:
+                    count_suspected += 1
+                elif status == detected_val:
+                    count_detected += 1
+            # Only store if there are counts
+            if count_suspected > 0 or count_detected > 0:
+                timestamp_counts[bucket_start_ns] = (count_suspected, count_detected)
+        # Convert to final format (sorted by timestamp)
+        values = [
+            (
+                pd.Timestamp(bucket_ns, unit="ns", tz="UTC").to_pydatetime(),
+                count_suspected,
+                count_detected,
+            )
+            for bucket_ns, (count_suspected, count_detected) in sorted(
+                timestamp_counts.items()
+            )
+        ]
+        return mm_schemas.ModelEndpointDriftValues(values=values)

mlrun/model_monitoring/features_drift_table.py CHANGED Viewed

@@ -14,7 +14,8 @@
 import functools
 import sys
-from typing import Callable, Union
+from collections.abc import Callable
+from typing import Union
 import numpy as np
 import plotly.graph_objects as go

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -15,8 +15,9 @@
 import datetime
 import functools
 import os
+from collections.abc import Callable
 from fnmatch import fnmatchcase
-from typing import TYPE_CHECKING, Callable, Optional, TypedDict, Union, cast
+from typing import TYPE_CHECKING, Optional, TypedDict, Union, cast
 import numpy as np
 import pandas as pd
@@ -143,7 +144,7 @@ def get_stream_path(
         return stream_uri.replace("v3io://", f"ds://{profile.name}")
     elif isinstance(
-        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
+        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
     ):
         topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
             project=project, function_name=function_name
@@ -152,7 +153,7 @@ def get_stream_path(
     else:
         raise mlrun.errors.MLRunValueError(
             f"Received an unexpected stream profile type: {type(profile)}\n"
-            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
         )
@@ -300,7 +301,7 @@ def _get_v3io_output_stream(
 def _get_kafka_output_stream(
     *,
-    kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
+    kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream,
     project: str,
     function_name: str,
     mock: bool = False,
@@ -356,7 +357,7 @@ def get_output_stream(
         )
     elif isinstance(
-        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
+        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
     ):
         return _get_kafka_output_stream(
             kafka_profile=profile,
@@ -368,7 +369,7 @@ def get_output_stream(
     else:
         raise mlrun.errors.MLRunValueError(
             f"Received an unexpected stream profile type: {type(profile)}\n"
-            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
         )
@@ -659,3 +660,26 @@ def get_start_end(
         )
     return start, end
+def validate_time_range(
+    start: Optional[datetime.datetime] = None, end: Optional[datetime.datetime] = None
+) -> tuple[datetime.datetime, datetime.datetime]:
+    """
+    validate start and end parameters and set default values if needed.
+    :param start:       Either None or datetime, None is handled as datetime.now(tz=timezone.utc) - timedelta(days=1)
+    :param end:         Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
+    :return:            start datetime, end datetime
+    """
+    end = end or mlrun.utils.helpers.datetime_now()
+    start = start or (end - datetime.timedelta(days=1))
+    if start.tzinfo is None or end.tzinfo is None:
+        raise mlrun.errors.MLRunInvalidArgumentTypeError(
+            "Custom start and end times must contain the timezone."
+        )
+    if start > end:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "The start time must be before the end time. Note that if end time is not provided, "
+            "the current time is used by default."
+        )
+    return start, end

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -200,9 +200,25 @@ class EventStreamProcessor:
                 after="ProcessEndpointEvent",
             )
+            # split the graph between NOP event to regular event
+            graph.add_step(
+                "storey.Filter",
+                "FilterNOP",
+                after="filter_none",
+                _fn="(not (isinstance(event, dict) and event.get('kind', "
+                ") == 'nop_event'))",
+            )
+            graph.add_step(
+                "storey.Filter",
+                "ForwardNOP",
+                after="filter_none",
+                _fn="(isinstance(event, dict) and event.get('kind', "
+                ") == 'nop_event')",
+            )
             # flatten the events
             graph.add_step(
-                "storey.FlatMap", "flatten_events", _fn="(event)", after="filter_none"
+                "storey.FlatMap", "flatten_events", _fn="(event)", after="FilterNOP"
             )
         apply_storey_filter_and_flatmap()
@@ -218,19 +234,6 @@ class EventStreamProcessor:
             )
         apply_map_feature_names()
-        # split the graph between event with error vs valid event
-        graph.add_step(
-            "storey.Filter",
-            "FilterNOP",
-            after="MapFeatureNames",
-            _fn="(event.get('kind', " ") != 'nop_event')",
-        )
-        graph.add_step(
-            "storey.Filter",
-            "ForwardNOP",
-            after="MapFeatureNames",
-            _fn="(event.get('kind', " ") == 'nop_event')",
-        )
         tsdb_connector.apply_monitoring_stream_steps(
             graph=graph,
@@ -244,7 +247,7 @@ class EventStreamProcessor:
             graph.add_step(
                 "ProcessBeforeParquet",
                 name="ProcessBeforeParquet",
-                after="FilterNOP",
+                after="MapFeatureNames",
                 _fn="(event)",
             )
@@ -370,7 +373,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
             logger.debug(
                 "Skipped nop event inside of ProcessEndpointEvent", event=event
             )
-            full_event.body = [event]
             return full_event
         # Getting model version and function uri from event
         # and use them for retrieving the endpoint_id
@@ -396,6 +398,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         request_id = event.get("request", {}).get("id") or event.get("resp", {}).get(
             "id"
         )
+        feature_names = event.get("request", {}).get("input_schema")
+        labels_names = event.get("resp", {}).get("output_schema")
         latency = event.get("microsec")
         features = event.get("request", {}).get("inputs")
         predictions = event.get("resp", {}).get("outputs")
@@ -496,6 +500,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                     ),
                     EventFieldType.EFFECTIVE_SAMPLE_COUNT: effective_sample_count,
                     EventFieldType.ESTIMATED_PREDICTION_COUNT: estimated_prediction_count,
+                    EventFieldType.FEATURE_NAMES: feature_names,
+                    EventFieldType.LABEL_NAMES: labels_names,
                 }
             )
@@ -602,19 +608,19 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         self.endpoint_type = {}
     def _infer_feature_names_from_data(self, event):
-        for endpoint_id in self.feature_names:
-            if len(self.feature_names[endpoint_id]) >= len(
-                event[EventFieldType.FEATURES]
-            ):
-                return self.feature_names[endpoint_id]
+        endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        if endpoint_id in self.feature_names and len(
+            self.feature_names[endpoint_id]
+        ) >= len(event[EventFieldType.FEATURES]):
+            return self.feature_names[endpoint_id]
         return None
     def _infer_label_columns_from_data(self, event):
-        for endpoint_id in self.label_columns:
-            if len(self.label_columns[endpoint_id]) >= len(
-                event[EventFieldType.PREDICTION]
-            ):
-                return self.label_columns[endpoint_id]
+        endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        if endpoint_id in self.label_columns and len(
+            self.label_columns[endpoint_id]
+        ) >= len(event[EventFieldType.PREDICTION]):
+            return self.label_columns[endpoint_id]
         return None
     def do(self, event: dict):
@@ -659,7 +665,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     "Feature names are not initialized, they will be automatically generated",
                     endpoint_id=endpoint_id,
                 )
-                feature_names = [
+                feature_names = event.get(EventFieldType.FEATURE_NAMES) or [
                     f"f{i}" for i, _ in enumerate(event[EventFieldType.FEATURES])
                 ]
@@ -682,7 +688,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     "label column names are not initialized, they will be automatically generated",
                     endpoint_id=endpoint_id,
                 )
-                label_columns = [
+                label_columns = event.get(EventFieldType.LABEL_NAMES) or [
                     f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
                 ]
                 attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl