PyPI - mlrun - Versions diffs - 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl - Mend

mlrun 1.10.0rc16py3-none-any.whl → 1.10.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (98) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +9 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/common/secrets.py +22 -1
mlrun/config.py +32 -10
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/datastore.py +9 -4
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +363 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +211 -74
mlrun/datastore/model_provider/openai_provider.py +243 -71
mlrun/datastore/s3.py +24 -2
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +27 -19
mlrun/db/httpdb.py +57 -48
mlrun/db/nopdb.py +25 -10
mlrun/execution.py +55 -13
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +2 -0
mlrun/model.py +9 -3
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +372 -136
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +36 -13
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
mlrun/model_monitoring/helpers.py +28 -5
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +6 -1
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +128 -45
mlrun/run.py +94 -17
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +147 -17
mlrun/runtimes/nuclio/function.py +70 -27
mlrun/runtimes/nuclio/serving.py +85 -4
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +54 -13
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +211 -40
mlrun/serving/states.py +536 -156
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +136 -81
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +212 -82
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +44 -45
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +97 -92
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0

mlrun/serving/steps.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Union
+import storey
+import mlrun.errors
+class ChoiceByField(storey.Choice):
+    """
+    Selects downstream outlets to route each event based on a predetermined field.
+    :param field_name: event field name that contains the step name or names of the desired outlet or outlets
+    """
+    def __init__(self, field_name: Union[str, list[str]], **kwargs):
+        self.field_name = field_name
+        super().__init__(**kwargs)
+    def select_outlets(self, event):
+        # Case 1: Missing field
+        if self.field_name not in event:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"Field '{self.field_name}' is not contained in the event keys {list(event.keys())}."
+            )
+        outlet = event[self.field_name]
+        # Case 2: Field exists but is None
+        if outlet is None:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Field '{self.field_name}' exists but its value is None."
+            )
+        # Case 3: Invalid type
+        if not isinstance(outlet, (str, list, tuple)):
+            raise mlrun.errors.MLRunInvalidArgumentTypeError(
+                f"Field '{self.field_name}' must be a string or list of strings "
+                f"but is instead of type '{type(outlet).__name__}'."
+            )
+        outlets = [outlet] if isinstance(outlet, str) else outlet
+        # Case 4: Empty list or tuple
+        if not outlets:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"The value of the key '{self.field_name}' cannot be an empty {type(outlets).__name__}."
+            )
+        return outlets

mlrun/serving/system_steps.py CHANGED Viewed

@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import random
+from copy import copy
 from datetime import timedelta
 from typing import Any, Optional, Union
@@ -22,11 +22,29 @@ import storey
 import mlrun
 import mlrun.artifacts
 import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.feature_store
 import mlrun.serving
+from mlrun.common.model_monitoring.helpers import (
+    get_model_endpoints_creation_task_status,
+)
 from mlrun.common.schemas import MonitoringData
 from mlrun.utils import get_data_from_path, logger
+class MatchingEndpointsState(mlrun.common.types.StrEnum):
+    all_matched = "all_matched"
+    not_all_matched = "not_all_matched"
+    no_check_needed = "no_check_needed"
+    not_yet_checked = "not_yet_matched"
+    @staticmethod
+    def success_states() -> list[str]:
+        return [
+            MatchingEndpointsState.all_matched,
+            MatchingEndpointsState.no_check_needed,
+        ]
 class MonitoringPreProcessor(storey.MapClass):
     """preprocess step, reconstructs the serving output event body to StreamProcessingEvent schema"""
@@ -45,33 +63,20 @@ class MonitoringPreProcessor(storey.MapClass):
         result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
         input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
-        result = get_data_from_path(result_path, event.body.get(model, event.body))
         output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
         input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
-        logger.debug("output schema retrieved", output_schema=output_schema)
-        if isinstance(result, dict):
-            # transpose by key the outputs:
-            outputs = self.transpose_by_key(result, output_schema)
-            if not output_schema:
-                logger.warn(
-                    "Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
-                    "may not preserved"
-                )
-        else:
-            outputs = result
+        logger.debug(
+            "output and input schema retrieved",
+            output_schema=output_schema,
+            input_schema=input_schema,
+        )
-        event_inputs = event._metadata.get("inputs", {})
-        event_inputs = get_data_from_path(input_path, event_inputs)
-        if isinstance(event_inputs, dict):
-            # transpose by key the inputs:
-            inputs = self.transpose_by_key(event_inputs, input_schema)
-            if not input_schema:
-                logger.warn(
-                    "Input schema was not provided using by ModelRunnerStep:add_model, order "
-                    "may not preserved"
-                )
-        else:
-            inputs = event_inputs
+        outputs, new_output_schema = self.get_listed_data(
+            event.body.get(model, event.body), result_path, output_schema
+        )
+        inputs, new_input_schema = self.get_listed_data(
+            event._metadata.get("inputs", {}), input_path, input_schema
+        )
         if outputs and isinstance(outputs[0], list):
             if output_schema and len(output_schema) != len(outputs[0]):
@@ -96,15 +101,43 @@ class MonitoringPreProcessor(storey.MapClass):
                     "outputs and inputs are not in the same length check 'input_path' and "
                     "'output_path' was specified if needed"
                 )
-        request = {"inputs": inputs, "id": getattr(event, "id", None)}
-        resp = {"outputs": outputs}
+        request = {
+            "inputs": inputs,
+            "id": getattr(event, "id", None),
+            "input_schema": new_input_schema,
+        }
+        resp = {"outputs": outputs, "output_schema": new_output_schema}
         return request, resp
+    def get_listed_data(
+        self,
+        raw_data: dict,
+        data_path: Optional[Union[list[str], str]] = None,
+        schema: Optional[list[str]] = None,
+    ):
+        """Get data from a path and transpose it by keys if dict is provided."""
+        new_schema = None
+        data_from_path = get_data_from_path(data_path, raw_data)
+        if isinstance(data_from_path, dict):
+            # transpose by key the inputs:
+            listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
+            new_schema = new_schema or schema
+            if not schema:
+                logger.warn(
+                    f"No schema provided through add_model(); the order of {data_from_path} "
+                    "may not be preserved."
+                )
+        elif not isinstance(data_from_path, list):
+            listed_data = [data_from_path]
+        else:
+            listed_data = data_from_path
+        return listed_data, new_schema
     @staticmethod
     def transpose_by_key(
         data: dict, schema: Optional[Union[str, list[str]]] = None
-    ) -> Union[list[float], list[list[float]]]:
+    ) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
         """
         Transpose values from a dictionary by keys.
@@ -136,17 +169,28 @@ class MonitoringPreProcessor(storey.MapClass):
                          * If result is a matrix, returns a list of lists.
         :raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
+                mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
         """
+        new_schema = None
+        # Normalize keys in data:
+        normalize_data = {
+            mlrun.feature_store.api.norm_column_name(k): copy(v)
+            for k, v in data.items()
+        }
         # Normalize schema to list
         if not schema:
-            keys = list(data.keys())
+            keys = list(normalize_data.keys())
+            new_schema = keys
         elif isinstance(schema, str):
-            keys = [schema]
+            keys = [mlrun.feature_store.api.norm_column_name(schema)]
         else:
-            keys = schema
+            keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
-        values = [data[key] for key in keys]
+        values = [normalize_data[key] for key in keys if key in normalize_data]
+        if len(values) != len(keys):
+            raise mlrun.MLRunInvalidArgumentError(
+                f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
+            )
         # Detect if all are scalars ie: int,float,str
         all_scalars = all(not isinstance(v, (list, tuple, np.ndarray)) for v in values)
@@ -158,18 +202,18 @@ class MonitoringPreProcessor(storey.MapClass):
             )
         if all_scalars:
-            transposed = np.array([values])
+            transposed = np.array([values], dtype=object)
         elif all_lists and len(keys) > 1:
-            arrays = [np.array(v) for v in values]
+            arrays = [np.array(v, dtype=object) for v in values]
             mat = np.stack(arrays, axis=0)
             transposed = mat.T
         else:
-            return values[0]
+            return values[0], new_schema
         if transposed.shape[1] == 1 and transposed.shape[0] == 1:
             # Transform [[0]] -> [0]:
-            return transposed[:, 0].tolist()
-        return transposed.tolist()
+            return transposed[:, 0].tolist(), new_schema
+        return transposed.tolist(), new_schema
     def do(self, event):
         monitoring_event_list = []
@@ -192,6 +236,12 @@ class MonitoringPreProcessor(storey.MapClass):
                     request, resp = self.reconstruct_request_resp_fields(
                         event, model, monitoring_data[model]
                     )
+                    if hasattr(event, "_original_timestamp"):
+                        when = event._original_timestamp
+                    else:
+                        when = event._metadata.get(model, {}).get(
+                            mm_schemas.StreamProcessingEvent.WHEN
+                        )
                     monitoring_event_list.append(
                         {
                             mm_schemas.StreamProcessingEvent.MODEL: model,
@@ -201,17 +251,16 @@ class MonitoringPreProcessor(storey.MapClass):
                             mm_schemas.StreamProcessingEvent.MICROSEC: event._metadata.get(
                                 model, {}
                             ).get(mm_schemas.StreamProcessingEvent.MICROSEC),
-                            mm_schemas.StreamProcessingEvent.WHEN: event._metadata.get(
-                                model, {}
-                            ).get(mm_schemas.StreamProcessingEvent.WHEN),
+                            mm_schemas.StreamProcessingEvent.WHEN: when,
                             mm_schemas.StreamProcessingEvent.ENDPOINT_ID: monitoring_data[
                                 model
                             ].get(
                                 mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
                             ),
-                            mm_schemas.StreamProcessingEvent.LABELS: monitoring_data[
+                            mm_schemas.StreamProcessingEvent.LABELS: event.body[
                                 model
-                            ].get(mlrun.common.schemas.MonitoringData.OUTPUTS),
+                            ].get("labels")
+                            or {},
                             mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
                             if self.server
                             else None,
@@ -236,6 +285,10 @@ class MonitoringPreProcessor(storey.MapClass):
             request, resp = self.reconstruct_request_resp_fields(
                 event, model, monitoring_data[model]
             )
+            if hasattr(event, "_original_timestamp"):
+                when = event._original_timestamp
+            else:
+                when = event._metadata.get(mm_schemas.StreamProcessingEvent.WHEN)
             monitoring_event_list.append(
                 {
                     mm_schemas.StreamProcessingEvent.MODEL: model,
@@ -245,25 +298,20 @@ class MonitoringPreProcessor(storey.MapClass):
                     mm_schemas.StreamProcessingEvent.MICROSEC: event._metadata.get(
                         mm_schemas.StreamProcessingEvent.MICROSEC
                     ),
-                    mm_schemas.StreamProcessingEvent.WHEN: event._metadata.get(
-                        mm_schemas.StreamProcessingEvent.WHEN
-                    ),
+                    mm_schemas.StreamProcessingEvent.WHEN: when,
                     mm_schemas.StreamProcessingEvent.ENDPOINT_ID: monitoring_data[
                         model
                     ].get(mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID),
-                    mm_schemas.StreamProcessingEvent.LABELS: monitoring_data[model].get(
-                        mlrun.common.schemas.MonitoringData.OUTPUTS
-                    ),
+                    mm_schemas.StreamProcessingEvent.LABELS: event.body.get("labels")
+                    or {},
                     mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
                     if self.server
                     else None,
                     mm_schemas.StreamProcessingEvent.REQUEST: request,
                     mm_schemas.StreamProcessingEvent.RESPONSE: resp,
-                    mm_schemas.StreamProcessingEvent.ERROR: event.body[
+                    mm_schemas.StreamProcessingEvent.ERROR: event.body.get(
                         mm_schemas.StreamProcessingEvent.ERROR
-                    ]
-                    if mm_schemas.StreamProcessingEvent.ERROR in event.body
-                    else None,
+                    ),
                     mm_schemas.StreamProcessingEvent.METRICS: event.body[
                         mm_schemas.StreamProcessingEvent.METRICS
                     ]
@@ -283,6 +331,9 @@ class BackgroundTaskStatus(storey.MapClass):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        self.matching_endpoints = MatchingEndpointsState.not_yet_checked
+        self.graph_model_endpoint_uids: set = set()
+        self.listed_model_endpoint_uids: set = set()
         self.server: mlrun.serving.GraphServer = (
             getattr(self.context, "server", None) if self.context else None
         )
@@ -303,43 +354,47 @@ class BackgroundTaskStatus(storey.MapClass):
                 )
             )
         ):
-            background_task = mlrun.get_run_db().get_project_background_task(
-                self.server.project, self.server.model_endpoint_creation_task_name
-            )
-            self._background_task_check_timestamp = mlrun.utils.now_date()
-            self._log_background_task_state(background_task.status.state)
-            self._background_task_state = background_task.status.state
+            (
+                self._background_task_state,
+                self._background_task_check_timestamp,
+                self.listed_model_endpoint_uids,
+            ) = get_model_endpoints_creation_task_status(self.server)
+        if (
+            self.listed_model_endpoint_uids
+            and self.matching_endpoints == MatchingEndpointsState.not_yet_checked
+        ):
+            if not self.graph_model_endpoint_uids:
+                self.graph_model_endpoint_uids = collect_model_endpoint_uids(
+                    self.server
+                )
+            if self.graph_model_endpoint_uids.issubset(self.listed_model_endpoint_uids):
+                self.matching_endpoints = MatchingEndpointsState.all_matched
+        elif self.listed_model_endpoint_uids is None:
+            self.matching_endpoints = MatchingEndpointsState.no_check_needed
         if (
             self._background_task_state
             == mlrun.common.schemas.BackgroundTaskState.succeeded
+            and self.matching_endpoints in MatchingEndpointsState.success_states()
         ):
             return event
         else:
             return None
-    def _log_background_task_state(
-        self, background_task_state: mlrun.common.schemas.BackgroundTaskState
-    ):
-        logger.info(
-            "Checking model endpoint creation task status",
-            task_name=self.server.model_endpoint_creation_task_name,
-        )
-        if (
-            background_task_state
-            in mlrun.common.schemas.BackgroundTaskState.terminal_states()
-        ):
-            logger.info(
-                f"Model endpoint creation task completed with state {background_task_state}"
-            )
-        else:  # in progress
-            logger.info(
-                f"Model endpoint creation task is still in progress with the current state: "
-                f"{background_task_state}. Events will not be monitored for the next "
-                f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
-                name=self.name,
-                background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
-            )
+def collect_model_endpoint_uids(server: mlrun.serving.GraphServer) -> set[str]:
+    """Collects all model endpoint UIDs from the server's graph steps."""
+    model_endpoint_uids = set()
+    for step in server.graph.steps.values():
+        if hasattr(step, "monitoring_data"):
+            for model in step.monitoring_data.keys():
+                uid = step.monitoring_data[model].get(
+                    mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
+                )
+                if uid:
+                    model_endpoint_uids.add(uid)
+    return model_endpoint_uids
 class SamplingStep(storey.MapClass):

mlrun/serving/v2_serving.py CHANGED Viewed

@@ -24,6 +24,9 @@ import mlrun.common.schemas.model_monitoring
 import mlrun.model_monitoring
 from mlrun.utils import logger, now_date
+from ..common.model_monitoring.helpers import (
+    get_model_endpoints_creation_task_status,
+)
 from .utils import StepToDict, _extract_input_data, _update_result_body
@@ -474,22 +477,18 @@ class V2ModelServer(StepToDict):
         ) or getattr(self.context, "server", None)
         if not self.context.is_mock or self.context.monitoring_mock:
             if server.model_endpoint_creation_task_name:
-                background_task = mlrun.get_run_db().get_project_background_task(
-                    server.project, server.model_endpoint_creation_task_name
-                )
-                logger.debug(
-                    "Checking model endpoint creation task status",
-                    task_name=server.model_endpoint_creation_task_name,
+                background_task_state, _, _ = get_model_endpoints_creation_task_status(
+                    server
                 )
                 if (
-                    background_task.status.state
+                    background_task_state
                     in mlrun.common.schemas.BackgroundTaskState.terminal_states()
                 ):
                     logger.debug(
-                        f"Model endpoint creation task completed with state {background_task.status.state}"
+                        f"Model endpoint creation task completed with state {background_task_state}"
                     )
                     if (
-                        background_task.status.state
+                        background_task_state
                         == mlrun.common.schemas.BackgroundTaskState.succeeded
                     ):
                         self._model_logger = (
@@ -504,7 +503,7 @@ class V2ModelServer(StepToDict):
                 else:  # in progress
                     logger.debug(
                         f"Model endpoint creation task is still in progress with the current state: "
-                        f"{background_task.status.state}.",
+                        f"{background_task_state}.",
                         name=self.name,
                     )
             else:

mlrun 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc16py3-none-any.whl → 1.10.0rc42py3-none-any.whl