PyPI - mlrun - Versions diffs - 1.8.0rc38__py3-none-any.whl → 1.8.0rc39__py3-none-any.whl - Mend

mlrun 1.8.0rc38py3-none-any.whl → 1.8.0rc39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (30) hide show

mlrun/__main__.py +1 -8
mlrun/artifacts/base.py +3 -3
mlrun/artifacts/manager.py +1 -1
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +2 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +5 -11
mlrun/datastore/base.py +0 -11
mlrun/db/httpdb.py +11 -0
mlrun/model_monitoring/api.py +2 -20
mlrun/model_monitoring/controller.py +18 -2
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +21 -6
mlrun/projects/operations.py +3 -3
mlrun/projects/pipelines.py +5 -0
mlrun/projects/project.py +4 -4
mlrun/run.py +4 -4
mlrun/runtimes/kubejob.py +2 -2
mlrun/runtimes/nuclio/application/application.py +0 -2
mlrun/runtimes/nuclio/function.py +1 -46
mlrun/runtimes/pod.py +37 -145
mlrun/serving/routers.py +80 -64
mlrun/serving/v2_serving.py +24 -62
mlrun/utils/async_http.py +1 -2
mlrun/utils/helpers.py +1 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/METADATA +1 -1
{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/RECORD +30 -30
{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/top_level.txt +0 -0

mlrun/runtimes/pod.py CHANGED Viewed

@@ -17,7 +17,6 @@ import os
 import re
 import time
 import typing
-import warnings
 from collections.abc import Iterable
 from enum import Enum
@@ -704,7 +703,29 @@ class KubeResourceSpec(FunctionSpec):
                 ),
                 affinity_field_name=affinity_field_name,
             )
+        # purge any affinity / anti-affinity preemption related configuration and enrich with preemptible tolerations
         elif self_preemption_mode == PreemptionModes.allow.value:
+            # remove preemptible anti-affinity
+            self._prune_affinity_node_selector_requirement(
+                generate_preemptible_node_selector_requirements(
+                    NodeSelectorOperator.node_selector_op_not_in.value
+                ),
+                affinity_field_name=affinity_field_name,
+            )
+            # remove preemptible affinity
+            self._prune_affinity_node_selector_requirement(
+                generate_preemptible_node_selector_requirements(
+                    NodeSelectorOperator.node_selector_op_in.value
+                ),
+                affinity_field_name=affinity_field_name,
+            )
+            # remove preemptible nodes constrain
+            self._prune_node_selector(
+                mlconf.get_preemptible_node_selector(),
+                node_selector_field_name=node_selector_field_name,
+            )
             # enrich with tolerations
             self._merge_tolerations(
                 generate_preemptible_tolerations(),
@@ -1180,132 +1201,6 @@ class KubeResource(BaseRuntime):
         """
         self.spec.with_requests(mem, cpu, patch=patch)
-    def detect_preemptible_node_selector(
-        self, node_selector: dict[str, str]
-    ) -> list[str]:
-        """
-        Checks if any provided node selector matches the preemptible node selectors.
-        Issues a warning if a selector may be pruned at runtime depending on preemption mode.
-        :param node_selector: The user-provided node selector dictionary.
-        """
-        preemptible_node_selector = mlconf.get_preemptible_node_selector()
-        return [
-            f"'{key}': '{val}'"
-            for key, val in node_selector.items()
-            if preemptible_node_selector.get(key) == val
-        ]
-    def detect_preemptible_tolerations(
-        self, tolerations: list[k8s_client.V1Toleration]
-    ) -> list[str]:
-        """
-        Checks if any provided toleration matches preemptible tolerations.
-        Issues a warning if a toleration may be pruned at runtime depending on preemption mode.
-        :param tolerations: The user-provided list of tolerations.
-        """
-        preemptible_tolerations = [
-            k8s_client.V1Toleration(
-                key=toleration.get("key"),
-                value=toleration.get("value"),
-                effect=toleration.get("effect"),
-            )
-            for toleration in mlconf.get_preemptible_tolerations()
-        ]
-        def _format_toleration(toleration):
-            return f"'{toleration.key}'='{toleration.value}' (effect: '{toleration.effect}')"
-        return [
-            _format_toleration(toleration)
-            for toleration in tolerations
-            if toleration in preemptible_tolerations
-        ]
-    def detect_preemptible_affinity(self, affinity: k8s_client.V1Affinity) -> list[str]:
-        """
-        Checks if any provided affinity rules match preemptible affinity configurations.
-        Issues a warning if an affinity rule may be pruned at runtime depending on preemption mode.
-        :param affinity: The user-provided affinity object.
-        """
-        preemptible_affinity_terms = generate_preemptible_nodes_affinity_terms()
-        conflicting_affinities = []
-        if (
-            affinity
-            and affinity.node_affinity
-            and affinity.node_affinity.required_during_scheduling_ignored_during_execution
-        ):
-            user_terms = affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms
-            for user_term in user_terms:
-                user_expressions = {
-                    (expr.key, expr.operator, tuple(expr.values or []))
-                    for expr in user_term.match_expressions or []
-                }
-                for preemptible_term in preemptible_affinity_terms:
-                    preemptible_expressions = {
-                        (expr.key, expr.operator, tuple(expr.values or []))
-                        for expr in preemptible_term.match_expressions or []
-                    }
-                    # Ensure operators match and preemptible expressions are present
-                    common_exprs = user_expressions & preemptible_expressions
-                    if common_exprs:
-                        formatted = ", ".join(
-                            f"'{key}  {operator}  {list(values)}'"
-                            for key, operator, values in common_exprs
-                        )
-                        conflicting_affinities.append(formatted)
-        return conflicting_affinities
-    def raise_preemptible_warning(
-        self,
-        node_selector: typing.Optional[dict[str, str]],
-        tolerations: typing.Optional[list[k8s_client.V1Toleration]],
-        affinity: typing.Optional[k8s_client.V1Affinity],
-    ) -> None:
-        """
-        Detects conflicts and issues a single warning if necessary.
-        :param node_selector: The user-provided node selector dictionary.
-        :param tolerations: The user-provided list of tolerations.
-        :param affinity: The user-provided affinity object.
-        """
-        conflict_messages = []
-        if node_selector:
-            ns_conflicts = ", ".join(
-                self.detect_preemptible_node_selector(node_selector)
-            )
-            if ns_conflicts:
-                conflict_messages.append(f"Node selectors: {ns_conflicts}")
-        if tolerations:
-            tol_conflicts = ", ".join(self.detect_preemptible_tolerations(tolerations))
-            if tol_conflicts:
-                conflict_messages.append(f"Tolerations: {tol_conflicts}")
-        if affinity:
-            affinity_conflicts = ", ".join(self.detect_preemptible_affinity(affinity))
-            if affinity_conflicts:
-                conflict_messages.append(f"Affinity: {affinity_conflicts}")
-        if conflict_messages:
-            warning_componentes = "; \n".join(conflict_messages)
-            warnings.warn(
-                f"Warning: based on the preemptible node settings configured in your MLRun configuration,\n"
-                f"{warning_componentes}\n"
-                f" may be removed or adjusted at runtime.\n"
-                "This adjustment depends on the function's preemption mode. \n"
-                "The list of potential adjusted preemptible selectors can be viewed here: "
-                "mlrun.mlconf.get_preemptible_node_selector() and mlrun.mlconf.get_preemptible_tolerations()."
-            )
     def with_node_selection(
         self,
         node_name: typing.Optional[str] = None,
@@ -1314,14 +1209,19 @@ class KubeResource(BaseRuntime):
         tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
     ):
         """
-        Enables control over which Kubernetes node the job will run on.
+        Enables to control on which k8s node the job will run
+        :param node_name:       The name of the k8s node
+        :param node_selector:   Label selector, only nodes with matching labels will be eligible to be picked
+        :param affinity:        Expands the types of constraints you can express - see
+                                https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
+                                for details
+        :param tolerations:     Tolerations are applied to pods, and allow (but do not require) the pods to schedule
+                                onto nodes with matching taints - see
+                                https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
+                                for details
-        :param node_name:       The name of the Kubernetes node.
-        :param node_selector:   Label selector, only nodes with matching labels will be eligible.
-        :param affinity:        Defines scheduling constraints.
-        :param tolerations:     Allows scheduling onto nodes with matching taints.
         """
-        # Apply values as before
         if node_name:
             self.spec.node_name = node_name
         if node_selector is not None:
@@ -1332,12 +1232,6 @@ class KubeResource(BaseRuntime):
         if tolerations is not None:
             self.spec.tolerations = tolerations
-        self.raise_preemptible_warning(
-            node_selector=self.spec.node_selector,
-            tolerations=self.spec.tolerations,
-            affinity=self.spec.affinity,
-        )
     def with_priority_class(self, name: typing.Optional[str] = None):
         """
         Enables to control the priority of the pod
@@ -1578,15 +1472,13 @@ class KubeResource(BaseRuntime):
                 f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
             )
         if watch and not ready:
-            state = self._build_watch(
+            self.status.state = self._build_watch(
                 watch=watch,
                 show_on_failure=show_on_failure,
             )
-            ready = state == "ready"
-            self.status.state = state
-        if watch and not ready:
-            raise mlrun.errors.MLRunRuntimeError("Deploy failed")
+            ready = self.status.state == "ready"
+            if not ready:
+                raise mlrun.errors.MLRunRuntimeError("Deploy failed")
         return ready
     def _build_watch(

mlrun/serving/routers.py CHANGED Viewed

@@ -18,6 +18,7 @@ import copy
 import json
 import traceback
 import typing
+from datetime import timedelta
 from enum import Enum
 from io import BytesIO
 from typing import Union
@@ -78,6 +79,9 @@ class BaseModelRouter(RouterToDict):
         self.inputs_key = "instances" if self.protocol == "v1" else "inputs"
         self._input_path = input_path
         self._result_path = result_path
+        self._background_task_check_timestamp = None
+        self._background_task_terminate = False
+        self._background_task_current_state = None
         self.kwargs = kwargs
     def parse_event(self, event):
@@ -135,6 +139,7 @@ class BaseModelRouter(RouterToDict):
             raise ValueError(
                 f"illegal path prefix {urlpath}, must start with {self.url_prefix}"
             )
+        self._update_background_task_state(event)
         return event
     def do_event(self, event, *args, **kwargs):
@@ -160,6 +165,63 @@ class BaseModelRouter(RouterToDict):
         """run tasks after processing the event"""
         return event
+    def _get_background_task_status(
+        self,
+    ) -> mlrun.common.schemas.BackgroundTaskState:
+        self._background_task_check_timestamp = now_date()
+        server: mlrun.serving.GraphServer = getattr(
+            self.context, "_server", None
+        ) or getattr(self.context, "server", None)
+        if not self.context.is_mock:
+            if server.model_endpoint_creation_task_name:
+                background_task = mlrun.get_run_db().get_project_background_task(
+                    server.project, server.model_endpoint_creation_task_name
+                )
+                logger.debug(
+                    "Checking model endpoint creation task status",
+                    task_name=server.model_endpoint_creation_task_name,
+                )
+                if (
+                    background_task.status.state
+                    in mlrun.common.schemas.BackgroundTaskState.terminal_states()
+                ):
+                    logger.debug(
+                        f"Model endpoint creation task completed with state {background_task.status.state}"
+                    )
+                    self._background_task_terminate = True
+                else:  # in progress
+                    logger.debug(
+                        f"Model endpoint creation task is still in progress with the current state: "
+                        f"{background_task.status.state}. Events will not be monitored for the next 15 seconds",
+                        name=self.name,
+                        background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
+                    )
+                return background_task.status.state
+            else:
+                logger.debug(
+                    "Model endpoint creation task name not provided",
+                )
+        elif self.context.monitoring_mock:
+            self._background_task_terminate = (
+                True  # If mock monitoring we return success and terminate task check.
+            )
+            return mlrun.common.schemas.BackgroundTaskState.succeeded
+        self._background_task_terminate = True  # If mock without monitoring we return failed and terminate task check.
+        return mlrun.common.schemas.BackgroundTaskState.failed
+    def _update_background_task_state(self, event):
+        if not self._background_task_terminate and (
+            self._background_task_check_timestamp is None
+            or now_date() - self._background_task_check_timestamp
+            >= timedelta(seconds=15)
+        ):
+            self._background_task_current_state = self._get_background_task_status()
+        if event.body:
+            event.body["background_task_state"] = (
+                self._background_task_current_state
+                or mlrun.common.schemas.BackgroundTaskState.running
+            )
 class ModelRouter(BaseModelRouter):
     def _resolve_route(self, body, urlpath):
@@ -599,75 +661,29 @@ class VotingEnsemble(ParallelRun):
         self.log_router = True
         self.prediction_col_name = prediction_col_name or "prediction"
         self.format_response_with_col_name_flag = format_response_with_col_name_flag
-        self.model_endpoint_uid = None
-        self.model_endpoint = None
+        self.model_endpoint_uid = kwargs.get("model_endpoint_uid", None)
         self.shard_by_endpoint = shard_by_endpoint
+        self._model_logger = None
         self.initialized = False
     def post_init(self, mode="sync", **kwargs):
         self._update_weights(self.weights)
-    def _lazy_init(self, event_id):
-        server: mlrun.serving.GraphServer = getattr(
-            self.context, "_server", None
-        ) or getattr(self.context, "server", None)
-        if not server:
-            logger.warn("GraphServer not initialized for VotingEnsemble instance")
-            return
-        if not self.context.is_mock or self.context.monitoring_mock:
-            if server.model_endpoint_creation_task_name:
-                background_task = mlrun.get_run_db().get_project_background_task(
-                    server.project, server.model_endpoint_creation_task_name
-                )
-                logger.info(
-                    "Checking model endpoint creation task status",
-                    task_name=server.model_endpoint_creation_task_name,
-                )
-                if (
-                    background_task.status.state
-                    in mlrun.common.schemas.BackgroundTaskState.terminal_states()
-                ):
-                    logger.info(
-                        f"Model endpoint creation task completed with state {background_task.status.state}"
-                    )
-                else:  # in progress
-                    logger.debug(
-                        f"Model endpoint creation task is still in progress with the current state: "
-                        f"{background_task.status.state}. This event will not be monitored.",
-                        name=self.name,
-                        event_id=event_id,
-                    )
-                    self.initialized = False
-                    return
-            else:
-                logger.info(
-                    "Model endpoint creation task name not provided",
-                )
-            try:
-                self.model_endpoint_uid = (
-                    mlrun.get_run_db()
-                    .get_model_endpoint(
-                        project=server.project,
-                        name=self.name,
-                        function_name=server.function_name,
-                        function_tag=server.function_tag or "latest",
-                        tsdb_metrics=False,
-                    )
-                    .metadata.uid
-                )
-            except mlrun.errors.MLRunNotFoundError:
-                logger.info(
-                    "Model endpoint not found for this step; monitoring for this model will not be performed",
-                    function_name=server.function_name,
-                    name=self.name,
+    def _lazy_init(self, event):
+        if event and isinstance(event, dict):
+            background_task_state = event.get("background_task_state", None)
+            if (
+                background_task_state
+                == mlrun.common.schemas.BackgroundTaskState.succeeded
+            ):
+                self._model_logger = (
+                    _ModelLogPusher(self, self.context)
+                    if self.context
+                    and self.context.stream.enabled
+                    and self.model_endpoint_uid
+                    else None
                 )
-                self.model_endpoint_uid = None
-        self._model_logger = (
-            _ModelLogPusher(self, self.context)
-            if self.context and self.context.stream.enabled and self.model_endpoint_uid
-            else None
-        )
-        self.initialized = True
+                self.initialized = True
     def _resolve_route(self, body, urlpath):
         """Resolves the appropriate model to send the event to.
@@ -872,14 +888,14 @@ class VotingEnsemble(ParallelRun):
         Response
             Event response after running the requested logic
         """
-        if not self.initialized:
-            self._lazy_init(event.id)
         start = now_date()
         # Handle and verify the request
         original_body = event.body
         event.body = _extract_input_data(self._input_path, event.body)
         event = self.preprocess(event)
         event = self._pre_handle_event(event)
+        if not self.initialized:
+            self._lazy_init(event.body)
         # Should we terminate the event?
         if hasattr(event, "terminated") and event.terminated:

mlrun/serving/v2_serving.py CHANGED Viewed

@@ -111,11 +111,11 @@ class V2ModelServer(StepToDict):
         if model:
             self.model = model
             self.ready = True
-        self.model_endpoint_uid = None
+        self.model_endpoint_uid = kwargs.get("model_endpoint_uid", None)
         self.shard_by_endpoint = shard_by_endpoint
         self._model_logger = None
         self.initialized = False
-        self.output_schema = []
+        self.output_schema = kwargs.get("outputs", [])
     def _load_and_update_state(self):
         try:
@@ -137,67 +137,29 @@ class V2ModelServer(StepToDict):
             else:
                 self._load_and_update_state()
-    def _lazy_init(self, event_id):
-        server: mlrun.serving.GraphServer = getattr(
-            self.context, "_server", None
-        ) or getattr(self.context, "server", None)
-        if not server:
-            logger.warn("GraphServer not initialized for VotingEnsemble instance")
-            return
-        if not self.context.is_mock and not self.model_spec:
+        if self.ready and not self.context.is_mock and not self.model_spec:
             self.get_model()
-        if not self.context.is_mock or self.context.monitoring_mock:
-            if server.model_endpoint_creation_task_name:
-                background_task = mlrun.get_run_db().get_project_background_task(
-                    server.project, server.model_endpoint_creation_task_name
-                )
-                logger.debug(
-                    "Checking model endpoint creation task status",
-                    task_name=server.model_endpoint_creation_task_name,
-                )
-                if (
-                    background_task.status.state
-                    in mlrun.common.schemas.BackgroundTaskState.terminal_states()
-                ):
-                    logger.debug(
-                        f"Model endpoint creation task completed with state {background_task.status.state}"
-                    )
-                else:  # in progress
-                    logger.debug(
-                        f"Model endpoint creation task is still in progress with the current state: "
-                        f"{background_task.status.state}. This event will not be monitored.",
-                        name=self.name,
-                        event_id=event_id,
-                    )
-                    self.initialized = False
-                    return
-            else:
-                logger.debug(
-                    "Model endpoint creation task name not provided",
-                )
-            try:
-                model_endpoint = mlrun.get_run_db().get_model_endpoint(
-                    project=server.project,
-                    name=self.name,
-                    function_name=server.function_name,
-                    function_tag=server.function_tag or "latest",
-                    tsdb_metrics=False,
-                )
-                self.model_endpoint_uid = model_endpoint.metadata.uid
-                self.output_schema = model_endpoint.spec.label_names
-            except mlrun.errors.MLRunNotFoundError:
-                logger.info(
-                    "Model endpoint not found for this step; monitoring for this model will not be performed",
-                    function_name=server.function_name,
-                    name=self.name,
+        if self.model_spec:
+            self.output_schema = self.output_schema or [
+                feature.name for feature in self.model_spec.outputs
+            ]
+    def _lazy_init(self, event):
+        if event and isinstance(event, dict):
+            background_task_state = event.get("background_task_state", None)
+            if (
+                background_task_state
+                == mlrun.common.schemas.BackgroundTaskState.succeeded
+            ):
+                self._model_logger = (
+                    _ModelLogPusher(self, self.context)
+                    if self.context
+                    and self.context.stream.enabled
+                    and self.model_endpoint_uid
+                    else None
                 )
-                self.model_endpoint_uid = None
-        self._model_logger = (
-            _ModelLogPusher(self, self.context)
-            if self.context and self.context.stream.enabled and self.model_endpoint_uid
-            else None
-        )
-        self.initialized = True
+                self.initialized = True
     def get_param(self, key: str, default=None):
         """get param by key (specified in the model or the function)"""
@@ -276,7 +238,7 @@ class V2ModelServer(StepToDict):
     def do_event(self, event, *args, **kwargs):
         """main model event handler method"""
         if not self.initialized:
-            self._lazy_init(event.id)
+            self._lazy_init(event.body)
         start = now_date()
         original_body = event.body
         event_body = _extract_input_data(self._input_path, event.body)

mlrun/utils/async_http.py CHANGED Viewed

@@ -26,8 +26,7 @@ from aiohttp_retry.client import _RequestContext
 from mlrun.config import config
 from mlrun.errors import err_to_str
 from mlrun.errors import raise_for_status as ml_raise_for_status
-from .helpers import logger as mlrun_logger
+from mlrun.utils.helpers import logger as mlrun_logger
 DEFAULT_BLACKLISTED_METHODS = [
     "POST",

mlrun/utils/helpers.py CHANGED Viewed

@@ -146,7 +146,7 @@ def get_artifact_target(item: dict, project=None):
     return item["spec"].get("target_path")
-# TODO: left for migrations testing purposes. Remove in 1.8.0.
+# TODO: Remove once data migration v5 is obsolete
 def is_legacy_artifact(artifact):
     if isinstance(artifact, dict):
         return "metadata" not in artifact
@@ -498,7 +498,6 @@ def get_in(obj, keys, default=None):
     """
     if isinstance(keys, str):
         keys = keys.split(".")
     for key in keys:
         if not obj or key not in obj:
             return default

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "239429702facc9fa9c6667d4a9db952f1940e598",
-  "version": "1.8.0-rc38"
+  "git_commit": "c4d1cedcb732b6108ad1b9a2e33df82ba9114fa1",
+  "version": "1.8.0-rc39"
 }

{mlrun-1.8.0rc38.dist-info → mlrun-1.8.0rc39.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mlrun
-Version: 1.8.0rc38
+Version: 1.8.0rc39
 Summary: Tracking and config of machine learning runs
 Home-page: https://github.com/mlrun/mlrun
 Author: Yaron Haviv

mlrun 1.8.0rc38__py3-none-any.whl → 1.8.0rc39__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc38py3-none-any.whl → 1.8.0rc39py3-none-any.whl