PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/serving/steps.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Union
+import storey
+import mlrun.errors
+class ChoiceByField(storey.Choice):
+    """
+    Selects downstream outlets to route each event based on a predetermined field.
+    :param field_name: event field name that contains the step name or names of the desired outlet or outlets
+    """
+    def __init__(self, field_name: Union[str, list[str]], **kwargs):
+        self.field_name = field_name
+        super().__init__(**kwargs)
+    def select_outlets(self, event):
+        # Case 1: Missing field
+        if self.field_name not in event:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"Field '{self.field_name}' is not contained in the event keys {list(event.keys())}."
+            )
+        outlet = event[self.field_name]
+        # Case 2: Field exists but is None
+        if outlet is None:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Field '{self.field_name}' exists but its value is None."
+            )
+        # Case 3: Invalid type
+        if not isinstance(outlet, str | list | tuple):
+            raise mlrun.errors.MLRunInvalidArgumentTypeError(
+                f"Field '{self.field_name}' must be a string or list of strings "
+                f"but is instead of type '{type(outlet).__name__}'."
+            )
+        outlets = [outlet] if isinstance(outlet, str) else outlet
+        # Case 4: Empty list or tuple
+        if not outlets:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"The value of the key '{self.field_name}' cannot be an empty {type(outlets).__name__}."
+            )
+        return outlets

mlrun/serving/system_steps.py CHANGED Viewed

@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import random
+import typing
+from copy import copy
 from datetime import timedelta
 from typing import Any, Optional, Union
@@ -22,11 +23,29 @@ import storey
 import mlrun
 import mlrun.artifacts
 import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.feature_store
 import mlrun.serving
+from mlrun.common.model_monitoring.helpers import (
+    get_model_endpoints_creation_task_status,
+)
 from mlrun.common.schemas import MonitoringData
 from mlrun.utils import get_data_from_path, logger
+class MatchingEndpointsState(mlrun.common.types.StrEnum):
+    all_matched = "all_matched"
+    not_all_matched = "not_all_matched"
+    no_check_needed = "no_check_needed"
+    not_yet_checked = "not_yet_matched"
+    @staticmethod
+    def success_states() -> list[str]:
+        return [
+            MatchingEndpointsState.all_matched,
+            MatchingEndpointsState.no_check_needed,
+        ]
 class MonitoringPreProcessor(storey.MapClass):
     """preprocess step, reconstructs the serving output event body to StreamProcessingEvent schema"""
@@ -45,33 +64,24 @@ class MonitoringPreProcessor(storey.MapClass):
         result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
         input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
-        result = get_data_from_path(result_path, event.body.get(model, event.body))
         output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
         input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
-        logger.debug("output schema retrieved", output_schema=output_schema)
-        if isinstance(result, dict):
-            # transpose by key the outputs:
-            outputs = self.transpose_by_key(result, output_schema)
-            if not output_schema:
-                logger.warn(
-                    "Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
-                    "may not preserved"
-                )
-        else:
-            outputs = result
-        event_inputs = event._metadata.get("inputs", {})
-        event_inputs = get_data_from_path(input_path, event_inputs)
-        if isinstance(event_inputs, dict):
-            # transpose by key the inputs:
-            inputs = self.transpose_by_key(event_inputs, input_schema)
-            if not input_schema:
-                logger.warn(
-                    "Input schema was not provided using by ModelRunnerStep:add_model, order "
-                    "may not preserved"
-                )
+        logger.debug(
+            "output and input schema retrieved",
+            output_schema=output_schema,
+            input_schema=input_schema,
+        )
+        if event.body and isinstance(event.body, list):
+            outputs, new_output_schema = self.get_listed_data(
+                event.body, result_path, output_schema
+            )
         else:
-            inputs = event_inputs
+            outputs, new_output_schema = self.get_listed_data(
+                event.body.get(model, event.body), result_path, output_schema
+            )
+        inputs, new_input_schema = self.get_listed_data(
+            event._metadata.get("inputs", {}), input_path, input_schema
+        )
         if outputs and isinstance(outputs[0], list):
             if output_schema and len(output_schema) != len(outputs[0]):
@@ -96,15 +106,43 @@ class MonitoringPreProcessor(storey.MapClass):
                     "outputs and inputs are not in the same length check 'input_path' and "
                     "'output_path' was specified if needed"
                 )
-        request = {"inputs": inputs, "id": getattr(event, "id", None)}
-        resp = {"outputs": outputs}
+        request = {
+            "inputs": inputs,
+            "id": getattr(event, "id", None),
+            "input_schema": new_input_schema,
+        }
+        resp = {"outputs": outputs, "output_schema": new_output_schema}
         return request, resp
+    def get_listed_data(
+        self,
+        raw_data: typing.Union[dict, list],
+        data_path: Optional[Union[list[str], str]] = None,
+        schema: Optional[list[str]] = None,
+    ):
+        """Get data from a path and transpose it by keys if dict is provided."""
+        new_schema = None
+        data_from_path = get_data_from_path(data_path, raw_data)
+        if isinstance(data_from_path, dict):
+            # transpose by key the inputs:
+            listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
+            new_schema = new_schema or schema
+            if not schema:
+                logger.warn(
+                    f"No schema provided through add_model(); the order of {data_from_path} "
+                    "may not be preserved."
+                )
+        elif not isinstance(data_from_path, list):
+            listed_data = [data_from_path]
+        else:
+            listed_data = data_from_path
+        return listed_data, new_schema
     @staticmethod
     def transpose_by_key(
         data: dict, schema: Optional[Union[str, list[str]]] = None
-    ) -> Union[list[Any], list[list[Any]]]:
+    ) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
         """
         Transpose values from a dictionary by keys.
@@ -136,25 +174,32 @@ class MonitoringPreProcessor(storey.MapClass):
                          * If result is a matrix, returns a list of lists.
         :raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
+                mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
         """
+        new_schema = None
+        # Normalize keys in data:
+        normalize_data = {
+            mlrun.feature_store.api.norm_column_name(k): copy(v)
+            for k, v in data.items()
+        }
         # Normalize schema to list
         if not schema:
-            keys = list(data.keys())
+            keys = list(normalize_data.keys())
+            new_schema = keys
         elif isinstance(schema, str):
-            keys = [schema]
+            keys = [mlrun.feature_store.api.norm_column_name(schema)]
         else:
-            keys = schema
+            keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
-        values = [data[key] for key in keys if key in data]
+        values = [normalize_data[key] for key in keys if key in normalize_data]
         if len(values) != len(keys):
             raise mlrun.MLRunInvalidArgumentError(
-                f"Schema keys {keys} do not match the data keys {list(data.keys())}."
+                f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
             )
         # Detect if all are scalars ie: int,float,str
-        all_scalars = all(not isinstance(v, (list, tuple, np.ndarray)) for v in values)
-        all_lists = all(isinstance(v, (list, tuple, np.ndarray)) for v in values)
+        all_scalars = all(not isinstance(v, list | tuple | np.ndarray) for v in values)
+        all_lists = all(isinstance(v, list | tuple | np.ndarray) for v in values)
         if not (all_scalars or all_lists):
             raise ValueError(
@@ -168,12 +213,12 @@ class MonitoringPreProcessor(storey.MapClass):
             mat = np.stack(arrays, axis=0)
             transposed = mat.T
         else:
-            return values[0]
+            return values[0], new_schema
         if transposed.shape[1] == 1 and transposed.shape[0] == 1:
             # Transform [[0]] -> [0]:
-            return transposed[:, 0].tolist()
-        return transposed.tolist()
+            return transposed[:, 0].tolist(), new_schema
+        return transposed.tolist(), new_schema
     def do(self, event):
         monitoring_event_list = []
@@ -202,6 +247,21 @@ class MonitoringPreProcessor(storey.MapClass):
                         when = event._metadata.get(model, {}).get(
                             mm_schemas.StreamProcessingEvent.WHEN
                         )
+                    #  if the body is not a dict, use empty labels, error and metrics
+                    if isinstance(event.body[model], dict):
+                        body_by_model = event.body[model]
+                        labels = body_by_model.get("labels") or {}
+                        error = body_by_model.get(
+                            mm_schemas.StreamProcessingEvent.ERROR
+                        )
+                        metrics = body_by_model.get(
+                            mm_schemas.StreamProcessingEvent.METRICS
+                        )
+                    else:
+                        labels = {}
+                        error = None
+                        metrics = None
                     monitoring_event_list.append(
                         {
                             mm_schemas.StreamProcessingEvent.MODEL: model,
@@ -217,26 +277,14 @@ class MonitoringPreProcessor(storey.MapClass):
                             ].get(
                                 mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
                             ),
-                            mm_schemas.StreamProcessingEvent.LABELS: monitoring_data[
-                                model
-                            ].get(mlrun.common.schemas.MonitoringData.OUTPUTS),
+                            mm_schemas.StreamProcessingEvent.LABELS: labels,
                             mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
                             if self.server
                             else None,
                             mm_schemas.StreamProcessingEvent.REQUEST: request,
                             mm_schemas.StreamProcessingEvent.RESPONSE: resp,
-                            mm_schemas.StreamProcessingEvent.ERROR: event.body[model][
-                                mm_schemas.StreamProcessingEvent.ERROR
-                            ]
-                            if mm_schemas.StreamProcessingEvent.ERROR
-                            in event.body[model]
-                            else None,
-                            mm_schemas.StreamProcessingEvent.METRICS: event.body[model][
-                                mm_schemas.StreamProcessingEvent.METRICS
-                            ]
-                            if mm_schemas.StreamProcessingEvent.METRICS
-                            in event.body[model]
-                            else None,
+                            mm_schemas.StreamProcessingEvent.ERROR: error,
+                            mm_schemas.StreamProcessingEvent.METRICS: metrics,
                         }
                     )
         elif monitoring_data:
@@ -248,6 +296,15 @@ class MonitoringPreProcessor(storey.MapClass):
                 when = event._original_timestamp
             else:
                 when = event._metadata.get(mm_schemas.StreamProcessingEvent.WHEN)
+            #  if the body is not a dict, use empty labels, error and metrics
+            if isinstance(event.body, dict):
+                labels = event.body.get("labels") or {}
+                error = event.body.get(mm_schemas.StreamProcessingEvent.ERROR)
+                metrics = event.body.get(mm_schemas.StreamProcessingEvent.METRICS)
+            else:
+                labels = {}
+                error = None
+                metrics = None
             monitoring_event_list.append(
                 {
                     mm_schemas.StreamProcessingEvent.MODEL: model,
@@ -261,24 +318,14 @@ class MonitoringPreProcessor(storey.MapClass):
                     mm_schemas.StreamProcessingEvent.ENDPOINT_ID: monitoring_data[
                         model
                     ].get(mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID),
-                    mm_schemas.StreamProcessingEvent.LABELS: monitoring_data[model].get(
-                        mlrun.common.schemas.MonitoringData.OUTPUTS
-                    ),
+                    mm_schemas.StreamProcessingEvent.LABELS: labels,
                     mm_schemas.StreamProcessingEvent.FUNCTION_URI: self.server.function_uri
                     if self.server
                     else None,
                     mm_schemas.StreamProcessingEvent.REQUEST: request,
                     mm_schemas.StreamProcessingEvent.RESPONSE: resp,
-                    mm_schemas.StreamProcessingEvent.ERROR: event.body[
-                        mm_schemas.StreamProcessingEvent.ERROR
-                    ]
-                    if mm_schemas.StreamProcessingEvent.ERROR in event.body
-                    else None,
-                    mm_schemas.StreamProcessingEvent.METRICS: event.body[
-                        mm_schemas.StreamProcessingEvent.METRICS
-                    ]
-                    if mm_schemas.StreamProcessingEvent.METRICS in event.body
-                    else None,
+                    mm_schemas.StreamProcessingEvent.ERROR: error,
+                    mm_schemas.StreamProcessingEvent.METRICS: metrics,
                 }
             )
         event.body = monitoring_event_list
@@ -293,6 +340,9 @@ class BackgroundTaskStatus(storey.MapClass):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        self.matching_endpoints = MatchingEndpointsState.not_yet_checked
+        self.graph_model_endpoint_uids: set = set()
+        self.listed_model_endpoint_uids: set = set()
         self.server: mlrun.serving.GraphServer = (
             getattr(self.context, "server", None) if self.context else None
         )
@@ -313,43 +363,47 @@ class BackgroundTaskStatus(storey.MapClass):
                 )
             )
         ):
-            background_task = mlrun.get_run_db().get_project_background_task(
-                self.server.project, self.server.model_endpoint_creation_task_name
-            )
-            self._background_task_check_timestamp = mlrun.utils.now_date()
-            self._log_background_task_state(background_task.status.state)
-            self._background_task_state = background_task.status.state
+            (
+                self._background_task_state,
+                self._background_task_check_timestamp,
+                self.listed_model_endpoint_uids,
+            ) = get_model_endpoints_creation_task_status(self.server)
+        if (
+            self.listed_model_endpoint_uids
+            and self.matching_endpoints == MatchingEndpointsState.not_yet_checked
+        ):
+            if not self.graph_model_endpoint_uids:
+                self.graph_model_endpoint_uids = collect_model_endpoint_uids(
+                    self.server
+                )
+            if self.graph_model_endpoint_uids.issubset(self.listed_model_endpoint_uids):
+                self.matching_endpoints = MatchingEndpointsState.all_matched
+        elif self.listed_model_endpoint_uids is None:
+            self.matching_endpoints = MatchingEndpointsState.no_check_needed
         if (
             self._background_task_state
             == mlrun.common.schemas.BackgroundTaskState.succeeded
+            and self.matching_endpoints in MatchingEndpointsState.success_states()
         ):
             return event
         else:
             return None
-    def _log_background_task_state(
-        self, background_task_state: mlrun.common.schemas.BackgroundTaskState
-    ):
-        logger.info(
-            "Checking model endpoint creation task status",
-            task_name=self.server.model_endpoint_creation_task_name,
-        )
-        if (
-            background_task_state
-            in mlrun.common.schemas.BackgroundTaskState.terminal_states()
-        ):
-            logger.info(
-                f"Model endpoint creation task completed with state {background_task_state}"
-            )
-        else:  # in progress
-            logger.info(
-                f"Model endpoint creation task is still in progress with the current state: "
-                f"{background_task_state}. Events will not be monitored for the next "
-                f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
-                name=self.name,
-                background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
-            )
+def collect_model_endpoint_uids(server: mlrun.serving.GraphServer) -> set[str]:
+    """Collects all model endpoint UIDs from the server's graph steps."""
+    model_endpoint_uids = set()
+    for step in server.graph.steps.values():
+        if hasattr(step, "monitoring_data"):
+            for model in step.monitoring_data.keys():
+                uid = step.monitoring_data[model].get(
+                    mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
+                )
+                if uid:
+                    model_endpoint_uids.add(uid)
+    return model_endpoint_uids
 class SamplingStep(storey.MapClass):

mlrun/serving/v2_serving.py CHANGED Viewed

@@ -24,6 +24,9 @@ import mlrun.common.schemas.model_monitoring
 import mlrun.model_monitoring
 from mlrun.utils import logger, now_date
+from ..common.model_monitoring.helpers import (
+    get_model_endpoints_creation_task_status,
+)
 from .utils import StepToDict, _extract_input_data, _update_result_body
@@ -474,22 +477,18 @@ class V2ModelServer(StepToDict):
         ) or getattr(self.context, "server", None)
         if not self.context.is_mock or self.context.monitoring_mock:
             if server.model_endpoint_creation_task_name:
-                background_task = mlrun.get_run_db().get_project_background_task(
-                    server.project, server.model_endpoint_creation_task_name
-                )
-                logger.debug(
-                    "Checking model endpoint creation task status",
-                    task_name=server.model_endpoint_creation_task_name,
+                background_task_state, _, _ = get_model_endpoints_creation_task_status(
+                    server
                 )
                 if (
-                    background_task.status.state
+                    background_task_state
                     in mlrun.common.schemas.BackgroundTaskState.terminal_states()
                 ):
                     logger.debug(
-                        f"Model endpoint creation task completed with state {background_task.status.state}"
+                        f"Model endpoint creation task completed with state {background_task_state}"
                     )
                     if (
-                        background_task.status.state
+                        background_task_state
                         == mlrun.common.schemas.BackgroundTaskState.succeeded
                     ):
                         self._model_logger = (
@@ -504,7 +503,7 @@ class V2ModelServer(StepToDict):
                 else:  # in progress
                     logger.debug(
                         f"Model endpoint creation task is still in progress with the current state: "
-                        f"{background_task.status.state}.",
+                        f"{background_task_state}.",
                         name=self.name,
                     )
             else:

mlrun/track/trackers/mlflow_tracker.py CHANGED Viewed

@@ -217,7 +217,7 @@ class MLFlowTracker(Tracker):
             handler=handler,
             run_name=run.info.run_name,
             project_name=project.name,
-            uid=run.info.run_uuid,
+            uid=run.info.run_id,
         )
         # Create a context from the run object:
@@ -373,7 +373,7 @@ class MLFlowTracker(Tracker):
             # Import the MLFlow run's artifacts to MLRun (model are logged after the rest of artifacts
             # so the artifacts can be registered as extra data in the models):
             artifacts = {}
-            model_paths = []
+            model_uris = []
             for artifact in client.list_artifacts(run_id=run.info.run_id):
                 # Get the artifact's local path (MLFlow suggests that if the artifact is already in the local filesystem
                 # its local path will be returned:
@@ -381,29 +381,29 @@ class MLFlowTracker(Tracker):
                     run_id=run.info.run_id,
                     artifact_path=artifact.path,
                 )
-                # Check if the artifact is a model (will be logged after the artifacts):
-                if artifact.is_dir and os.path.exists(
-                    os.path.join(
-                        artifact_local_path, "MLmodel"
-                    )  # Add tag to show model dir
-                ):
-                    model_paths.append(artifact_local_path)
-                else:
-                    # Log the artifact:
-                    artifact = MLFlowTracker._log_artifact(
-                        context=context,
-                        key=pathlib.Path(artifact.path).name.replace(".", "_"),
-                        # Mlflow has the same name for files but with different extensions, so we add extension to name
-                        local_path=artifact_local_path,
-                        tmp_path=tmp_dir,
-                    )
-                    artifacts[artifact.key] = artifact
+                # Log the artifact:
+                artifact = MLFlowTracker._log_artifact(
+                    context=context,
+                    key=pathlib.Path(artifact.path).name.replace(".", "_"),
+                    # Mlflow has the same name for files but with different extensions, so we add extension to name
+                    local_path=artifact_local_path,
+                    tmp_path=tmp_dir,
+                )
+                artifacts[artifact.key] = artifact
+            # get all run model's uri's (artifact_location in mlflow 3.0.0).
+            logged_models = mlflow.search_logged_models(
+                filter_string=f"source_run_id = '{run.info.run_id}'",
+                output_format="list",
+            )
+            for logged_model in logged_models:
+                model_uris.append(logged_model.artifact_location)
-            for model_path in model_paths:
+            for model_uri in model_uris:
                 MLFlowTracker._log_model(
                     context=context,
-                    model_uri=model_path,
-                    key=pathlib.Path(model_path).stem,
+                    model_uri=model_uri,
+                    key=pathlib.Path(model_uri).stem,
                     metrics=results,
                     extra_data=artifacts,
                     tmp_path=tmp_dir,
@@ -439,20 +439,18 @@ class MLFlowTracker(Tracker):
         # Get the model info from MLFlow:
         model_info = mlflow.models.get_model_info(model_uri=model_uri)
+        # Download the model and set the path to local path:
+        local_model_path = mlflow.artifacts.download_artifacts(
+            artifact_uri=str(model_uri)
+        )
+        model_path = pathlib.Path(local_model_path)
         # Prepare the archive path:
-        model_uri = pathlib.Path(model_uri)
-        archive_path = pathlib.Path(tmp_path) / f"{model_uri.stem}.zip"
-        if not os.path.exists(model_uri):
-            local_path = mlflow.artifacts.download_artifacts(
-                artifact_uri=str(model_uri)
-            )
-            model_uri = pathlib.Path(local_path)
+        archive_path = pathlib.Path(tmp_path) / f"{model_path.name}.zip"
         # TODO add progress bar for the case of large files
         # Zip the artifact:
         with zipfile.ZipFile(archive_path, "w") as zip_file:
-            for path in model_uri.rglob("*"):
+            for path in model_path.rglob("*"):
                 zip_file.write(filename=path, arcname=path.relative_to(model_uri))
         # Get inputs and outputs info:

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl