PyPI - mlrun - Versions diffs - 1.10.0rc25__py3-none-any.whl → 1.10.0rc27__py3-none-any.whl - Mend

mlrun 1.10.0rc25py3-none-any.whl → 1.10.0rc27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (44) hide show

mlrun/artifacts/llm_prompt.py +8 -1
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/hub.py +11 -18
mlrun/config.py +2 -3
mlrun/datastore/__init__.py +2 -2
mlrun/datastore/datastore_profile.py +27 -3
mlrun/datastore/model_provider/huggingface_provider.py +5 -1
mlrun/datastore/model_provider/model_provider.py +1 -1
mlrun/datastore/s3.py +24 -2
mlrun/datastore/storeytargets.py +2 -3
mlrun/db/base.py +14 -0
mlrun/db/httpdb.py +11 -2
mlrun/db/nopdb.py +13 -0
mlrun/k8s_utils.py +0 -14
mlrun/model_monitoring/applications/base.py +20 -3
mlrun/model_monitoring/controller.py +5 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
mlrun/model_monitoring/helpers.py +5 -5
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +5 -5
mlrun/run.py +12 -1
mlrun/runtimes/base.py +0 -3
mlrun/runtimes/mounts.py +15 -2
mlrun/runtimes/nuclio/function.py +35 -26
mlrun/runtimes/pod.py +153 -11
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +1 -0
mlrun/serving/states.py +3 -3
mlrun/serving/system_steps.py +52 -29
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +10 -13
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc27.dist-info}/METADATA +22 -26
{mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc27.dist-info}/RECORD +44 -44
{mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc27.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc27.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc27.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc25.dist-info → mlrun-1.10.0rc27.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py CHANGED Viewed

@@ -25,10 +25,12 @@ from mlrun.utils import logger
 def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
     """
-    Normalize user defined keys - input data to a model and its predictions,
-    to a form V3IO frames tolerates.
+    Normalize user-defined keys (e.g., model input data and predictions) to a format V3IO Frames tolerates.
-    The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
+    - Keys must match regex: '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'
+    - Replace invalid characters (e.g., '-') with '_'.
+    - Prefix keys starting with digits with '_'.
+    - Flatten nested dictionaries using dot notation, while normalizing keys recursively.
     """
     prefix = "_"
@@ -38,7 +40,18 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
             return prefix + key
         return key
-    return {norm_key(k): v for k, v in event.items()}
+    def flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
+        items = {}
+        for k, v in d.items():
+            new_key = norm_key(k)
+            full_key = f"{parent_key}.{new_key}" if parent_key else new_key
+            if isinstance(v, dict):
+                items.update(flatten_dict(v, full_key))
+            else:
+                items[full_key] = v
+        return items
+    return flatten_dict(event)
 class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -973,6 +973,9 @@ class V3IOTSDBConnector(TSDBConnector):
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
     ) -> dict[str, float]:
+        if not endpoint_ids:
+            return {}
         # Get the last request timestamp for each endpoint from the KV table.
         # The result of the query is a list of dictionaries,
         # each dictionary contains the endpoint id and the last request timestamp.

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -143,7 +143,7 @@ def get_stream_path(
         return stream_uri.replace("v3io://", f"ds://{profile.name}")
     elif isinstance(
-        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
+        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
     ):
         topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
             project=project, function_name=function_name
@@ -152,7 +152,7 @@ def get_stream_path(
     else:
         raise mlrun.errors.MLRunValueError(
             f"Received an unexpected stream profile type: {type(profile)}\n"
-            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
         )
@@ -300,7 +300,7 @@ def _get_v3io_output_stream(
 def _get_kafka_output_stream(
     *,
-    kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
+    kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream,
     project: str,
     function_name: str,
     mock: bool = False,
@@ -356,7 +356,7 @@ def get_output_stream(
         )
     elif isinstance(
-        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
+        profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
     ):
         return _get_kafka_output_stream(
             kafka_profile=profile,
@@ -368,7 +368,7 @@ def get_output_stream(
     else:
         raise mlrun.errors.MLRunValueError(
             f"Received an unexpected stream profile type: {type(profile)}\n"
-            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+            "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
         )

mlrun/projects/pipelines.py CHANGED Viewed

@@ -228,11 +228,11 @@ class _PipelineContext:
         force_run_local = mlrun.mlconf.force_run_local
         if force_run_local is None or force_run_local == "auto":
             force_run_local = not mlrun.mlconf.is_api_running_on_k8s()
+        if self.workflow:
             if not mlrun.mlconf.kfp_url:
                 logger.debug("Kubeflow pipeline URL is not set, running locally")
                 force_run_local = True
-        if self.workflow:
             force_run_local = force_run_local or self.workflow.run_local
         return force_run_local

mlrun/projects/project.py CHANGED Viewed

@@ -3816,7 +3816,7 @@ class MlrunProject(ModelObj):
             import mlrun
             from mlrun.datastore.datastore_profile import (
-                DatastoreProfileKafkaSource,
+                DatastoreProfileKafkaStream,
                 DatastoreProfileTDEngine,
             )
@@ -3833,7 +3833,7 @@ class MlrunProject(ModelObj):
             project.register_datastore_profile(tsdb_profile)
             # Create and register stream profile
-            stream_profile = DatastoreProfileKafkaSource(
+            stream_profile = DatastoreProfileKafkaStream(
                 name="my-kafka",
                 brokers=["<kafka-broker-ip-address>:9094"],
                 topics=[],  # Keep the topics list empty
@@ -3875,9 +3875,9 @@ class MlrunProject(ModelObj):
         .. code-block:: python
-            from mlrun.datastore.datastore_profile import DatastoreProfileKafkaSource
+            from mlrun.datastore.datastore_profile import DatastoreProfileKafkaStream
-            stream_profile = DatastoreProfileKafkaSource(
+            stream_profile = DatastoreProfileKafkaStream(
                 name="confluent-kafka",
                 brokers=["<server-domain-start>.confluent.cloud:9092"],
                 topics=[],
@@ -3906,7 +3906,7 @@ class MlrunProject(ModelObj):
                                           The supported profiles are:
                                           * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
-                                          * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
+                                          * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream`
                                           You need to register one of them, and pass the profile's name.
         :param replace_creds:             If ``True`` - override the existing credentials.

mlrun/run.py CHANGED Viewed

@@ -222,7 +222,8 @@ def get_or_create_ctx(
     :param spec:     dictionary holding run spec
     :param with_env: look for context in environment vars, default True
     :param rundb:    path/url to the metadata and artifact database
-    :param project:  project to initiate the context in (by default `mlrun.mlconf.active_project`)
+    :param project:  project to initiate the context in (by default `mlrun.mlconf.active_project`).
+                              If not set, an active project must exist.
     :param upload_artifacts:  when using local context (not as part of a job/run), upload artifacts to the
                               system default artifact path location
     :return: execution context
@@ -277,6 +278,16 @@ def get_or_create_ctx(
     if newspec and not isinstance(newspec, dict):
         newspec = json.loads(newspec)
+    if (
+        not newspec.get("metadata", {}).get("project")
+        and not project
+        and not mlconf.active_project
+    ):
+        raise mlrun.errors.MLRunMissingProjectError(
+            """No active project found. Make sure to set an active project using: mlrun.get_or_create_project()
+            You can verify the active project with: mlrun.mlconf.active_project"""
+        )
     if not newspec:
         newspec = {}
         if upload_artifacts:

mlrun/runtimes/base.py CHANGED Viewed

@@ -142,9 +142,6 @@ class FunctionSpec(ModelObj):
     def build(self, build):
         self._build = self._verify_dict(build, "build", ImageBuilder)
-    def enrich_function_preemption_spec(self):
-        pass
     def validate_service_account(self, allowed_service_accounts):
         pass

mlrun/runtimes/mounts.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import os
 import typing
+import warnings
 from collections import namedtuple
 from mlrun.config import config
@@ -247,10 +248,22 @@ def mount_s3(
     def _use_s3_cred(runtime: "KubeResource"):
         _access_key = aws_access_key or os.environ.get(prefix + "AWS_ACCESS_KEY_ID")
         _secret_key = aws_secret_key or os.environ.get(prefix + "AWS_SECRET_ACCESS_KEY")
-        _endpoint_url = endpoint_url or os.environ.get(prefix + "S3_ENDPOINT_URL")
+        # Check for endpoint URL with backward compatibility
+        _endpoint_url = endpoint_url or os.environ.get(prefix + "AWS_ENDPOINT_URL_S3")
+        if not _endpoint_url:
+            # Check for deprecated environment variable
+            _endpoint_url = os.environ.get(prefix + "S3_ENDPOINT_URL")
+            if _endpoint_url:
+                warnings.warn(
+                    "S3_ENDPOINT_URL is deprecated in 1.10.0 and will be removed in 1.12.0, "
+                    "use AWS_ENDPOINT_URL_S3 instead.",
+                    # TODO: Remove this in 1.12.0
+                    FutureWarning,
+                )
         if _endpoint_url:
-            runtime.set_env(prefix + "S3_ENDPOINT_URL", _endpoint_url)
+            runtime.set_env(prefix + "AWS_ENDPOINT_URL_S3", _endpoint_url)
         if aws_region:
             runtime.set_env(prefix + "AWS_REGION", aws_region)
         if non_anonymous:

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -968,24 +968,6 @@ class RemoteRuntime(KubeResource):
         self._mock_server = None
         if "://" not in path:
-            if not self.status.address:
-                # here we check that if default http trigger is disabled, function contains a custom http trigger
-                # Otherwise, the function is not invokable, so we raise an error
-                if (
-                    not self._trigger_of_kind_exists(kind="http")
-                    and self.spec.disable_default_http_trigger
-                ):
-                    raise mlrun.errors.MLRunPreconditionFailedError(
-                        "Default http trigger creation is disabled and there is no any other custom http trigger, "
-                        "so function can not be invoked via http. Either enable default http trigger creation or "
-                        "create custom http trigger"
-                    )
-                state, _, _ = self._get_state()
-                if state not in ["ready", "scaledToZero"]:
-                    logger.warning(f"Function is in the {state} state")
-                if not self.status.address:
-                    raise ValueError("no function address first run .deploy()")
             path = self._resolve_invocation_url(path, force_external_address)
         if headers is None:
@@ -1228,19 +1210,47 @@ class RemoteRuntime(KubeResource):
         # internal / external invocation urls is a nuclio >= 1.6.x feature
         # try to infer the invocation url from the internal and if not exists, use external.
         # $$$$ we do not want to use the external invocation url (e.g.: ingress, nodePort, etc.)
+        # check function state before invocation
+        state, _, _ = self._get_state()
+        if state not in ["ready", "scaledToZero"]:
+            logger.warning(f"Function is in the {state} state")
+        # prefer internal invocation url if running inside k8s cluster
         if (
             not force_external_address
             and self.status.internal_invocation_urls
             and mlrun.k8s_utils.is_running_inside_kubernetes_cluster()
         ):
-            return mlrun.utils.helpers.join_urls(
+            url = mlrun.utils.helpers.join_urls(
                 f"http://{self.status.internal_invocation_urls[0]}", path
             )
+            logger.debug(
+                f"Using internal invocation url {url}. Make sure you have network access to the k8s cluster. "
+                f"Otherwise, set force_external_address to True"
+            )
+            return url
         if self.status.external_invocation_urls:
             return mlrun.utils.helpers.join_urls(
                 f"http://{self.status.external_invocation_urls[0]}", path
             )
+        if not self.status.address:
+            # if there is no address
+            # here we check that if default http trigger is disabled, function contains a custom http trigger
+            # Otherwise, the function is not invokable, so we raise an error
+            if (
+                not self._trigger_of_kind_exists(kind="http")
+                and self.spec.disable_default_http_trigger
+            ):
+                raise mlrun.errors.MLRunPreconditionFailedError(
+                    "Default http trigger creation is disabled and there is no any other custom http trigger, "
+                    "so function can not be invoked via http. Either enable default http trigger creation or "
+                    "create custom http trigger"
+                )
+            else:
+                raise ValueError("no function address first run .deploy()")
         else:
             return mlrun.utils.helpers.join_urls(f"http://{self.status.address}", path)
@@ -1294,6 +1304,8 @@ class RemoteRuntime(KubeResource):
     def get_url(
         self,
         force_external_address: bool = False,
+        # leaving auth_info for BC
+        # TODO: remove in 1.12.0
         auth_info: AuthInfo = None,
     ):
         """
@@ -1304,13 +1316,10 @@ class RemoteRuntime(KubeResource):
         :return: returns function's url
         """
-        if not self.status.address:
-            state, _, _ = self._get_state(auth_info=auth_info)
-            if state != "ready" or not self.status.address:
-                raise ValueError(
-                    "no function address or not ready, first run .deploy()"
-                )
+        if auth_info:
+            logger.warning(
+                "Deprecated parameter 'auth_info' was provided, but will be ignored. Will be removed in 1.12.0."
+            )
         return self._resolve_invocation_url("", force_external_address)
     @staticmethod

mlrun/runtimes/pod.py CHANGED Viewed

@@ -17,6 +17,7 @@ import os
 import re
 import time
 import typing
+import warnings
 from collections.abc import Iterable
 from enum import Enum
@@ -35,6 +36,7 @@ from mlrun.common.schemas import (
 from ..config import config as mlconf
 from ..k8s_utils import (
+    generate_preemptible_nodes_affinity_terms,
     validate_node_selectors,
 )
 from ..utils import logger, update_in
@@ -874,6 +876,133 @@ class KubeResource(BaseRuntime):
         """
         self.spec.with_requests(mem, cpu, patch=patch)
+    @staticmethod
+    def detect_preemptible_node_selector(node_selector: dict[str, str]) -> list[str]:
+        """
+        Check whether any provided node selector matches preemptible selectors.
+        :param node_selector: User-provided node selector mapping.
+        :return: List of `"key='value'"` strings that match a preemptible selector.
+        """
+        preemptible_node_selector = mlconf.get_preemptible_node_selector()
+        return [
+            f"'{key}': '{val}'"
+            for key, val in node_selector.items()
+            if preemptible_node_selector.get(key) == val
+        ]
+    def detect_preemptible_tolerations(
+        self, tolerations: list[k8s_client.V1Toleration]
+    ) -> list[str]:
+        """
+        Check whether any provided toleration matches preemptible tolerations.
+        :param tolerations: User-provided tolerations.
+        :return: List of formatted toleration strings that are considered preemptible.
+        """
+        preemptible_tolerations = [
+            k8s_client.V1Toleration(
+                key=toleration.get("key"),
+                value=toleration.get("value"),
+                effect=toleration.get("effect"),
+            )
+            for toleration in mlconf.get_preemptible_tolerations()
+        ]
+        def _format_toleration(toleration):
+            return f"'{toleration.key}'='{toleration.value}' (effect: '{toleration.effect}')"
+        return [
+            _format_toleration(toleration)
+            for toleration in tolerations
+            if toleration in preemptible_tolerations
+        ]
+    def detect_preemptible_affinity(self, affinity: k8s_client.V1Affinity) -> list[str]:
+        """
+        Check whether any provided affinity rules match preemptible affinity configs.
+        :param affinity: User-provided affinity object.
+        :return: List of formatted expressions that overlap with preemptible terms.
+        """
+        preemptible_affinity_terms = generate_preemptible_nodes_affinity_terms()
+        conflicting_affinities = []
+        if (
+            affinity
+            and affinity.node_affinity
+            and affinity.node_affinity.required_during_scheduling_ignored_during_execution
+        ):
+            user_terms = affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms
+            for user_term in user_terms:
+                user_expressions = {
+                    (expr.key, expr.operator, tuple(expr.values or []))
+                    for expr in user_term.match_expressions or []
+                }
+                for preemptible_term in preemptible_affinity_terms:
+                    preemptible_expressions = {
+                        (expr.key, expr.operator, tuple(expr.values or []))
+                        for expr in preemptible_term.match_expressions or []
+                    }
+                    # Ensure operators match and preemptible expressions are present
+                    common_exprs = user_expressions & preemptible_expressions
+                    if common_exprs:
+                        formatted = ", ".join(
+                            f"'{key}  {operator}  {list(values)}'"
+                            for key, operator, values in common_exprs
+                        )
+                        conflicting_affinities.append(formatted)
+        return conflicting_affinities
+    def raise_preemptible_warning(
+        self,
+        node_selector: typing.Optional[dict[str, str]],
+        tolerations: typing.Optional[list[k8s_client.V1Toleration]],
+        affinity: typing.Optional[k8s_client.V1Affinity],
+    ) -> None:
+        """
+        Detect conflicts and emit a single consolidated warning if needed.
+        :param node_selector: User-provided node selector.
+        :param tolerations: User-provided tolerations.
+        :param affinity: User-provided affinity.
+        :warns: PreemptionWarning - Emitted when any of the provided selectors,
+                tolerations, or affinity terms match the configured preemptible
+                settings. The message lists the conflicting items.
+        """
+        conflict_messages = []
+        if node_selector:
+            ns_conflicts = ", ".join(
+                self.detect_preemptible_node_selector(node_selector)
+            )
+            if ns_conflicts:
+                conflict_messages.append(f"Node selectors: {ns_conflicts}")
+        if tolerations:
+            tol_conflicts = ", ".join(self.detect_preemptible_tolerations(tolerations))
+            if tol_conflicts:
+                conflict_messages.append(f"Tolerations: {tol_conflicts}")
+        if affinity:
+            affinity_conflicts = ", ".join(self.detect_preemptible_affinity(affinity))
+            if affinity_conflicts:
+                conflict_messages.append(f"Affinity: {affinity_conflicts}")
+        if conflict_messages:
+            warning_componentes = "; \n".join(conflict_messages)
+            warnings.warn(
+                f"Warning: based on MLRun's preemptible node configuration, the following components \n"
+                f"may be removed or adjusted at runtime:\n"
+                f"{warning_componentes}.\n"
+                "This adjustment depends on the function's preemption mode. \n"
+                "The list of potential adjusted preemptible selectors can be viewed here: "
+                "mlrun.mlconf.get_preemptible_node_selector() and mlrun.mlconf.get_preemptible_tolerations()."
+            )
     def with_node_selection(
         self,
         node_name: typing.Optional[str] = None,
@@ -882,18 +1011,26 @@ class KubeResource(BaseRuntime):
         tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
     ):
         """
-        Enables to control on which k8s node the job will run
-        :param node_name:       The name of the k8s node
-        :param node_selector:   Label selector, only nodes with matching labels will be eligible to be picked
-        :param affinity:        Expands the types of constraints you can express - see
-                                https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
-                                for details
-        :param tolerations:     Tolerations are applied to pods, and allow (but do not require) the pods to schedule
-                                onto nodes with matching taints - see
-                                https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
-                                for details
+        Configure Kubernetes node scheduling for this function.
+        Updates one or more scheduling hints: exact node pinning, label-based selection,
+        affinity/anti-affinity rules, and taint tolerations. Passing ``None`` leaves the
+        current value unchanged; pass an empty dict/list (e.g., ``{}``, ``[]``) to clear.
+        :param node_name: Exact Kubernetes node name to pin the pod to.
+        :param node_selector: Mapping of label selectors. Use ``{}`` to clear.
+        :param affinity: :class:`kubernetes.client.V1Affinity` constraints.
+        :param tolerations: List of :class:`kubernetes.client.V1Toleration`. Use ``[]`` to clear.
+        :warns: PreemptionWarning - Emitted if provided selectors/tolerations/affinity
+                conflict with the function's preemption mode.
+        Example usage:
+            Prefer a GPU pool and allow scheduling on spot nodes::
+                job.with_node_selection(
+                    node_selector={"nodepool": "gpu"},
+                    tolerations=[k8s_client.V1Toleration(key="spot", operator="Exists")],
+                )
         """
         if node_name:
             self.spec.node_name = node_name
@@ -904,6 +1041,11 @@ class KubeResource(BaseRuntime):
             self.spec.affinity = affinity
         if tolerations is not None:
             self.spec.tolerations = tolerations
+        self.raise_preemptible_warning(
+            node_selector=self.spec.node_selector,
+            tolerations=self.spec.tolerations,
+            affinity=self.spec.affinity,
+        )
     def with_priority_class(self, name: typing.Optional[str] = None):
         """

mlrun/serving/routers.py CHANGED Viewed

@@ -31,6 +31,9 @@ import mlrun.common.model_monitoring
 import mlrun.common.schemas.model_monitoring
 from mlrun.utils import logger, now_date
+from ..common.model_monitoring.helpers import (
+    get_model_endpoints_creation_task_status,
+)
 from .utils import RouterToDict, _extract_input_data, _update_result_body
 from .v2_serving import _ModelLogPusher
@@ -171,46 +174,6 @@ class BaseModelRouter(RouterToDict):
         """run tasks after processing the event"""
         return event
-    def _get_background_task_status(
-        self,
-    ) -> mlrun.common.schemas.BackgroundTaskState:
-        self._background_task_check_timestamp = now_date()
-        server: mlrun.serving.GraphServer = getattr(
-            self.context, "_server", None
-        ) or getattr(self.context, "server", None)
-        if not self.context.is_mock:
-            if server.model_endpoint_creation_task_name:
-                background_task = mlrun.get_run_db().get_project_background_task(
-                    server.project, server.model_endpoint_creation_task_name
-                )
-                logger.debug(
-                    "Checking model endpoint creation task status",
-                    task_name=server.model_endpoint_creation_task_name,
-                )
-                if (
-                    background_task.status.state
-                    in mlrun.common.schemas.BackgroundTaskState.terminal_states()
-                ):
-                    logger.info(
-                        f"Model endpoint creation task completed with state {background_task.status.state}"
-                    )
-                else:  # in progress
-                    logger.info(
-                        f"Model endpoint creation task is still in progress with the current state: "
-                        f"{background_task.status.state}. Events will not be monitored for the next "
-                        f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
-                        name=self.name,
-                        background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
-                    )
-                return background_task.status.state
-            else:
-                logger.error(
-                    "Model endpoint creation task name not provided. This function is not being monitored.",
-                )
-        elif self.context.monitoring_mock:
-            return mlrun.common.schemas.BackgroundTaskState.succeeded
-        return mlrun.common.schemas.BackgroundTaskState.failed
     def _update_background_task_state(self, event):
         if not self.background_task_reached_terminal_state and (
             self._background_task_check_timestamp is None
@@ -219,7 +182,26 @@ class BaseModelRouter(RouterToDict):
                 seconds=mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period
             )
         ):
-            self._background_task_current_state = self._get_background_task_status()
+            server: mlrun.serving.GraphServer = getattr(
+                self.context, "_server", None
+            ) or getattr(self.context, "server", None)
+            if not self.context.is_mock:
+                (
+                    self._background_task_current_state,
+                    self._background_task_check_timestamp,
+                    _,
+                ) = get_model_endpoints_creation_task_status(server)
+            elif self.context.monitoring_mock:
+                self._background_task_current_state = (
+                    mlrun.common.schemas.BackgroundTaskState.succeeded
+                )
+                self._background_task_check_timestamp = mlrun.utils.now_date()
+            else:
+                self._background_task_current_state = (
+                    mlrun.common.schemas.BackgroundTaskState.failed
+                )
+                self._background_task_check_timestamp = mlrun.utils.now_date()
         if event.body:
             event.body["background_task_state"] = (
                 self._background_task_current_state

mlrun/serving/server.py CHANGED Viewed

@@ -417,6 +417,7 @@ def add_monitoring_general_steps(
             "mlrun.serving.system_steps.BackgroundTaskStatus",
             "background_task_status_step",
             model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
+            full_event=True,
         )
     monitor_flow_step = graph.add_step(
         "storey.Filter",

mlrun/serving/states.py CHANGED Viewed

@@ -39,7 +39,7 @@ import mlrun.common.schemas as schemas
 from mlrun.artifacts.llm_prompt import LLMPromptArtifact, PlaceholderDefaultDict
 from mlrun.artifacts.model import ModelArtifact
 from mlrun.datastore.datastore_profile import (
-    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaStream,
     DatastoreProfileKafkaTarget,
     DatastoreProfileV3io,
     datastore_profile_read,
@@ -3398,7 +3398,7 @@ def _init_async_objects(context, steps):
                         datastore_profile = datastore_profile_read(stream_path)
                         if isinstance(
                             datastore_profile,
-                            (DatastoreProfileKafkaTarget, DatastoreProfileKafkaSource),
+                            (DatastoreProfileKafkaTarget, DatastoreProfileKafkaStream),
                         ):
                             step._async_object = KafkaStoreyTarget(
                                 path=stream_path,
@@ -3414,7 +3414,7 @@ def _init_async_objects(context, steps):
                         else:
                             raise mlrun.errors.MLRunValueError(
                                 f"Received an unexpected stream profile type: {type(datastore_profile)}\n"
-                                "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+                                "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
                             )
                     elif stream_path.startswith("kafka://") or kafka_brokers:
                         topic, brokers = parse_kafka_url(stream_path, kafka_brokers)

mlrun 1.10.0rc25__py3-none-any.whl → 1.10.0rc27__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc25py3-none-any.whl → 1.10.0rc27py3-none-any.whl