PyPI - mlrun - Versions diffs - 1.10.0rc2__py3-none-any.whl → 1.10.0rc4__py3-none-any.whl - Mend

mlrun 1.10.0rc2py3-none-any.whl → 1.10.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (67) hide show

mlrun/__init__.py +2 -2
mlrun/__main__.py +2 -2
mlrun/artifacts/__init__.py +1 -0
mlrun/artifacts/base.py +20 -8
mlrun/artifacts/dataset.py +1 -1
mlrun/artifacts/document.py +1 -1
mlrun/artifacts/helpers.py +40 -0
mlrun/artifacts/llm_prompt.py +165 -0
mlrun/artifacts/manager.py +13 -1
mlrun/artifacts/model.py +92 -12
mlrun/artifacts/plots.py +2 -2
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/runtimes/constants.py +0 -21
mlrun/common/schemas/artifact.py +12 -12
mlrun/common/schemas/pipeline.py +0 -16
mlrun/common/schemas/project.py +0 -17
mlrun/common/schemas/runs.py +0 -17
mlrun/config.py +3 -3
mlrun/datastore/base.py +2 -2
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/datastore_profile.py +3 -11
mlrun/datastore/redis.py +2 -3
mlrun/datastore/sources.py +0 -9
mlrun/datastore/store_resources.py +3 -3
mlrun/datastore/storeytargets.py +2 -5
mlrun/datastore/targets.py +7 -57
mlrun/datastore/utils.py +1 -11
mlrun/db/base.py +7 -6
mlrun/db/httpdb.py +72 -66
mlrun/db/nopdb.py +1 -0
mlrun/errors.py +22 -1
mlrun/execution.py +87 -1
mlrun/feature_store/common.py +5 -5
mlrun/feature_store/feature_set.py +10 -6
mlrun/feature_store/feature_vector.py +8 -6
mlrun/launcher/base.py +1 -1
mlrun/lists.py +1 -1
mlrun/model.py +0 -5
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +0 -44
mlrun/model_monitoring/applications/evidently/base.py +3 -41
mlrun/model_monitoring/controller.py +1 -1
mlrun/model_monitoring/writer.py +1 -4
mlrun/projects/operations.py +3 -3
mlrun/projects/project.py +260 -23
mlrun/run.py +9 -27
mlrun/runtimes/base.py +6 -6
mlrun/runtimes/kubejob.py +2 -2
mlrun/runtimes/nuclio/function.py +3 -3
mlrun/runtimes/nuclio/serving.py +13 -23
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/serving/__init__.py +5 -1
mlrun/serving/server.py +39 -3
mlrun/serving/states.py +101 -4
mlrun/serving/v2_serving.py +1 -1
mlrun/utils/helpers.py +66 -9
mlrun/utils/notifications/notification/slack.py +5 -1
mlrun/utils/notifications/notification_pusher.py +2 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc2.dist-info → mlrun-1.10.0rc4.dist-info}/METADATA +22 -10
{mlrun-1.10.0rc2.dist-info → mlrun-1.10.0rc4.dist-info}/RECORD +66 -65
{mlrun-1.10.0rc2.dist-info → mlrun-1.10.0rc4.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/tracking_policy.py +0 -124
{mlrun-1.10.0rc2.dist-info → mlrun-1.10.0rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc2.dist-info → mlrun-1.10.0rc4.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc2.dist-info → mlrun-1.10.0rc4.dist-info}/top_level.txt +0 -0

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -11,12 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import json
 import os
 import warnings
 from copy import deepcopy
-from typing import TYPE_CHECKING, Optional, Union
+from typing import Optional, Union
 import nuclio
 from nuclio import KafkaTrigger
@@ -27,7 +27,11 @@ from mlrun.datastore import get_kafka_brokers_from_dict, parse_kafka_url
 from mlrun.model import ObjectList
 from mlrun.runtimes.function_reference import FunctionReference
 from mlrun.secrets import SecretsStore
-from mlrun.serving.server import GraphServer, create_graph_server
+from mlrun.serving.server import (
+    GraphServer,
+    add_system_steps_to_graph,
+    create_graph_server,
+)
 from mlrun.serving.states import (
     RootFlowStep,
     RouterStep,
@@ -43,10 +47,6 @@ from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
 serving_subkind = "serving_v2"
-if TYPE_CHECKING:
-    # remove this block in 1.9.0
-    from mlrun.model_monitoring import TrackingPolicy
 def new_v2_model_server(
     name,
@@ -95,7 +95,6 @@ class ServingSpec(NuclioSpec):
         "default_class",
         "secret_sources",
         "track_models",
-        "tracking_policy",
     ]
     def __init__(
@@ -132,7 +131,6 @@ class ServingSpec(NuclioSpec):
         graph_initializer=None,
         error_stream=None,
         track_models=None,
-        tracking_policy=None,
         secret_sources=None,
         default_content_type=None,
         node_name=None,
@@ -207,7 +205,6 @@ class ServingSpec(NuclioSpec):
         self.graph_initializer = graph_initializer
         self.error_stream = error_stream
         self.track_models = track_models
-        self.tracking_policy = tracking_policy
         self.secret_sources = secret_sources or []
         self.default_content_type = default_content_type
         self.model_endpoint_creation_task_name = model_endpoint_creation_task_name
@@ -314,7 +311,6 @@ class ServingRuntime(RemoteRuntime):
         batch: Optional[int] = None,
         sampling_percentage: float = 100,
         stream_args: Optional[dict] = None,
-        tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
         enable_tracking: bool = True,
     ) -> None:
         """Apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
@@ -361,20 +357,12 @@ class ServingRuntime(RemoteRuntime):
         if batch:
             warnings.warn(
                 "The `batch` size parameter was deprecated in version 1.8.0 and is no longer used. "
-                "It will be removed in 1.10.",
-                # TODO: Remove this in 1.10
+                "It will be removed in 1.11.",
+                # TODO: Remove this in 1.11
                 FutureWarning,
             )
         if stream_args:
             self.spec.parameters["stream_args"] = stream_args
-        if tracking_policy is not None:
-            warnings.warn(
-                "The `tracking_policy` argument is deprecated from version 1.7.0 "
-                "and has no effect. It will be removed in 1.9.0.\n"
-                "To set the desired model monitoring time window and schedule, use "
-                "the `base_period` argument in `project.enable_model_monitoring()`.",
-                FutureWarning,
-            )
     def add_model(
         self,
@@ -719,7 +707,6 @@ class ServingRuntime(RemoteRuntime):
             "graph_initializer": self.spec.graph_initializer,
             "error_stream": self.spec.error_stream,
             "track_models": self.spec.track_models,
-            "tracking_policy": None,
             "default_content_type": self.spec.default_content_type,
             "model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
         }
@@ -761,10 +748,13 @@ class ServingRuntime(RemoteRuntime):
             set_paths(workdir)
             os.chdir(workdir)
+        system_graph = None
+        if isinstance(self.spec.graph, RootFlowStep):
+            system_graph = add_system_steps_to_graph(copy.deepcopy(self.spec.graph))
         server = create_graph_server(
             parameters=self.spec.parameters,
             load_mode=self.spec.load_mode,
-            graph=self.spec.graph,
+            graph=system_graph or self.spec.graph,
             verbose=self.verbose,
             current_function=current_function,
             graph_initializer=self.spec.graph_initializer,

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -103,6 +103,12 @@ class RemoteSparkRuntime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls):
+        if not mlrun.get_current_project(silent=True):
+            raise mlrun.errors.MLRunMissingProjectError(
+                "An active project is required to run deploy_default_image(). "
+                "This can be set by calling get_or_create_project(), load_project(), or new_project()."
+            )
         sj = mlrun.new_function(
             kind="remote-spark", name="remote-spark-default-image-deploy-temp"
         )

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -804,6 +804,12 @@ class Spark3Runtime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls, with_gpu=False):
+        if not mlrun.get_current_project(silent=True):
+            raise mlrun.errors.MLRunMissingProjectError(
+                "An active project is required to run deploy_default_image(). "
+                "This can be set by calling get_or_create_project()."
+            )
         sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
         sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)

mlrun/serving/__init__.py CHANGED Viewed

@@ -30,7 +30,11 @@ __all__ = [
 ]
 from .routers import ModelRouter, VotingEnsemble  # noqa
-from .server import GraphContext, GraphServer, create_graph_server  # noqa
+from .server import (
+    GraphContext,
+    GraphServer,
+    create_graph_server,
+)  # noqa
 from .states import (
     ErrorStep,
     QueueStep,

mlrun/serving/server.py CHANGED Viewed

@@ -15,6 +15,7 @@
 __all__ = ["GraphServer", "create_graph_server", "GraphContext", "MockEvent"]
 import asyncio
+import copy
 import json
 import os
 import socket
@@ -71,7 +72,7 @@ class _StreamContext:
         if (enabled or log_stream) and function_uri:
             self.enabled = True
             project, _, _, _ = parse_versioned_object_uri(
-                function_uri, config.default_project
+                function_uri, config.active_project
             )
             stream_args = parameters.get("stream_args", {})
@@ -108,7 +109,6 @@ class GraphServer(ModelObj):
         graph_initializer=None,
         error_stream=None,
         track_models=None,
-        tracking_policy=None,
         secret_sources=None,
         default_content_type=None,
         function_name=None,
@@ -129,7 +129,6 @@ class GraphServer(ModelObj):
         self.graph_initializer = graph_initializer
         self.error_stream = error_stream
         self.track_models = track_models
-        self.tracking_policy = tracking_policy
         self._error_stream_object = None
         self.secret_sources = secret_sources
         self._secrets = SecretsStore.from_list(secret_sources)
@@ -330,12 +329,49 @@ class GraphServer(ModelObj):
         return self.graph.wait_for_completion()
+def add_system_steps_to_graph(graph: RootFlowStep):
+    model_runner_raisers = {}
+    steps = list(graph.steps.values())
+    for step in steps:
+        if (
+            isinstance(step, mlrun.serving.states.ModelRunnerStep)
+            and step.raise_exception
+        ):
+            error_step = graph.add_step(
+                class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
+                name=f"{step.name}_error_raise",
+                after=step.name,
+                full_event=True,
+                raise_exception=step.raise_exception,
+                models_names=list(step.class_args["models"].keys()),
+            )
+            if step.responder:
+                step.responder = False
+                error_step.respond()
+            model_runner_raisers[step.name] = error_step.name
+            error_step.on_error = step.on_error
+        if isinstance(step.after, list):
+            for i in range(len(step.after)):
+                if step.after[i] in model_runner_raisers:
+                    step.after[i] = model_runner_raisers[step.after[i]]
+        else:
+            if step.after in model_runner_raisers:
+                step.after = model_runner_raisers[step.after]
+    return graph
 def v2_serving_init(context, namespace=None):
     """hook for nuclio init_context()"""
     context.logger.info("Initializing server from spec")
     spec = mlrun.utils.get_serving_spec()
     server = GraphServer.from_dict(spec)
+    if isinstance(server.graph, RootFlowStep):
+        server.graph = add_system_steps_to_graph(copy.deepcopy(server.graph))
+        context.logger.info_with(
+            "Server graph after adding system steps",
+            graph=str(server.graph.steps),
+        )
     if config.log_level.lower() == "debug":
         server.verbose = True

mlrun/serving/states.py CHANGED Viewed

@@ -32,12 +32,14 @@ import storey.utils
 import mlrun
 import mlrun.artifacts
 import mlrun.common.schemas as schemas
+from mlrun.artifacts.model import ModelArtifact
 from mlrun.datastore.datastore_profile import (
     DatastoreProfileKafkaSource,
     DatastoreProfileKafkaTarget,
     DatastoreProfileV3io,
     datastore_profile_read,
 )
+from mlrun.datastore.store_resources import get_store_resource
 from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
 from mlrun.utils import logger
@@ -47,7 +49,7 @@ from ..datastore.utils import (
     get_kafka_brokers_from_dict,
     parse_kafka_url,
 )
-from ..errors import MLRunInvalidArgumentError, err_to_str
+from ..errors import MLRunInvalidArgumentError, ModelRunnerError, err_to_str
 from ..model import ModelObj, ObjectDict
 from ..platforms.iguazio import parse_path
 from ..utils import get_class, get_function, is_explicit_ack_supported
@@ -955,10 +957,33 @@ class RouterStep(TaskStep):
 class Model(storey.ParallelExecutionRunnable):
+    def __init__(
+        self,
+        name: str,
+        raise_exception: bool = True,
+        artifact_uri: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(name=name, raise_exception=raise_exception, **kwargs)
+        if artifact_uri is not None and not isinstance(artifact_uri, str):
+            raise MLRunInvalidArgumentError("artifact_uri argument must be a string")
+        self.artifact_uri = artifact_uri
     def load(self) -> None:
         """Override to load model if needed."""
         pass
+    def _get_artifact_object(self) -> Union[ModelArtifact, None]:
+        if self.artifact_uri:
+            if mlrun.datastore.is_store_uri(self.artifact_uri):
+                return get_store_resource(self.artifact_uri)
+            else:
+                raise ValueError(
+                    "Could not get artifact, artifact_uri must be a valid artifact store URI"
+                )
+        else:
+            return None
     def init(self):
         self.load()
@@ -976,6 +1001,39 @@ class Model(storey.ParallelExecutionRunnable):
     async def run_async(self, body: Any, path: str) -> Any:
         return self.predict(body)
+    def get_local_model_path(self, suffix="") -> (str, dict):
+        """get local model file(s) and extra data items by using artifact
+        If the model file is stored in remote cloud storage, download it to the local file system
+        Examples
+        --------
+        ::
+            def load(self):
+                model_file, extra_data = self.get_local_model_path(suffix=".pkl")
+                self.model = load(open(model_file, "rb"))
+                categories = extra_data["categories"].as_df()
+        Parameters
+        ----------
+        suffix : str
+            optional, model file suffix (when the model_path is a directory)
+        Returns
+        -------
+        str
+            (local) model file
+        dict
+            extra dataitems dictionary
+        """
+        artifact = self._get_artifact_object()
+        if artifact:
+            model_file, _, extra_dataitems = mlrun.artifacts.get_model(
+                suffix=suffix, model_dir=artifact
+            )
+            return model_file, extra_dataitems
+        return None, None
 class ModelSelector:
     """Used to select which models to run on each event."""
@@ -1022,14 +1080,18 @@ class ModelRunnerStep(TaskStep, StepToDict):
     :param model_selector: ModelSelector instance whose select() method will be used to select models to run on each
       event. Optional. If not passed, all models will be run.
+    :param raise_exception:  If True, an error will be raised when model selection fails or if one of the models raised
+      an error. If False, the error will appear in the output event.
     """
     kind = "model_runner"
+    _dict_fields = TaskStep._dict_fields + ["raise_exception"]
     def __init__(
         self,
         *args,
         model_selector: Optional[Union[str, ModelSelector]] = None,
+        raise_exception: bool = True,
         **kwargs,
     ):
         super().__init__(
@@ -1038,6 +1100,7 @@ class ModelRunnerStep(TaskStep, StepToDict):
             class_args=dict(model_selector=model_selector),
             **kwargs,
         )
+        self.raise_exception = raise_exception
     def add_model(
         self,
@@ -1084,6 +1147,14 @@ class ModelRunnerStep(TaskStep, StepToDict):
         """
         # TODO allow model_class as Model object as part of ML-9924
         model_parameters = model_parameters or {}
+        model_artifact = (
+            model_artifact.uri
+            if isinstance(model_artifact, mlrun.artifacts.Artifact)
+            else model_artifact
+        )
+        model_parameters["artifact_uri"] = model_parameters.get(
+            "artifact_uri", model_artifact
+        )
         if model_parameters.get("name", endpoint_name) != endpoint_name:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Inconsistent name for model added to ModelRunnerStep."
@@ -1106,9 +1177,7 @@ class ModelRunnerStep(TaskStep, StepToDict):
             schemas.MonitoringData.INPUT_PATH: input_path,
             schemas.MonitoringData.CREATION_STRATEGY: creation_strategy,
             schemas.MonitoringData.LABELS: labels,
-            schemas.MonitoringData.MODEL_PATH: model_artifact.uri
-            if isinstance(model_artifact, mlrun.artifacts.Artifact)
-            else model_artifact,
+            schemas.MonitoringData.MODEL_PATH: model_artifact,
         }
         self.class_args[schemas.ModelRunnerStepData.MODELS] = models
         self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data
@@ -1121,7 +1190,12 @@ class ModelRunnerStep(TaskStep, StepToDict):
         model_objects = []
         for model, model_params in models.values():
             if not isinstance(model, Model):
+                # prevent model predict from raising error
+                model_params["raise_exception"] = False
                 model = get_class(model, namespace)(**model_params)
+            else:
+                # prevent model predict from raising error
+                model._raise_exception = False
             model_objects.append(model)
         self._async_object = ModelRunner(
             model_selector=model_selector,
@@ -1129,6 +1203,29 @@ class ModelRunnerStep(TaskStep, StepToDict):
         )
+class ModelRunnerErrorRaiser(storey.MapClass):
+    def __init__(self, raise_exception: bool, models_names: list[str], **kwargs):
+        super().__init__(**kwargs)
+        self._raise_exception = raise_exception
+        self._models_names = models_names
+    def do(self, event):
+        if self._raise_exception:
+            errors = {}
+            should_raise = False
+            if len(self._models_names) == 1:
+                should_raise = event.body.get("error") is not None
+                errors[self._models_names[0]] = event.body.get("error")
+            else:
+                for model in event.body:
+                    errors[model] = event.body.get(model).get("error")
+                    if errors[model] is not None:
+                        should_raise = True
+            if should_raise:
+                raise ModelRunnerError(models_errors=errors)
+        return event
 class QueueStep(BaseStep, StepToDict):
     """queue step, implement an async queue or represent a stream"""

mlrun/serving/v2_serving.py CHANGED Viewed

@@ -177,7 +177,7 @@ class V2ModelServer(StepToDict):
         """set real time metric (for model monitoring)"""
         self.metrics[name] = value
-    def get_model(self, suffix=""):
+    def get_model(self, suffix="") -> (str, dict):
         """get the model file(s) and metadata from model store
         the method returns a path to the model file and the extra data (dict of dataitem objects)

mlrun/utils/helpers.py CHANGED Viewed

@@ -60,6 +60,7 @@ import mlrun_pipelines.common.constants
 import mlrun_pipelines.models
 import mlrun_pipelines.utils
 from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
+from mlrun.common.schemas import ArtifactCategories
 from mlrun.config import config
 from mlrun_pipelines.models import PipelineRun
@@ -96,6 +97,7 @@ class StorePrefix:
     Model = "models"
     Dataset = "datasets"
     Document = "documents"
+    LLMPrompt = "llm-prompts"
     @classmethod
     def is_artifact(cls, prefix):
@@ -107,6 +109,7 @@ class StorePrefix:
             "model": cls.Model,
             "dataset": cls.Dataset,
             "document": cls.Document,
+            "llm-prompt": cls.LLMPrompt,
         }
         return kind_map.get(kind, cls.Artifact)
@@ -119,6 +122,7 @@ class StorePrefix:
             cls.FeatureSet,
             cls.FeatureVector,
             cls.Document,
+            cls.LLMPrompt,
         ]
@@ -131,7 +135,16 @@ def get_artifact_target(item: dict, project=None):
     kind = item.get("kind")
     uid = item["metadata"].get("uid")
-    if kind in {"dataset", "model", "artifact"} and db_key:
+    if (
+        kind
+        in {
+            ArtifactCategories.dataset,
+            ArtifactCategories.model,
+            ArtifactCategories.llm_prompt,
+            "artifact",
+        }
+        and db_key
+    ):
         target = (
             f"{DB_SCHEMA}://{StorePrefix.kind_to_prefix(kind)}/{project_str}/{db_key}"
         )
@@ -876,13 +889,18 @@ def enrich_image_url(
     client_version: Optional[str] = None,
     client_python_version: Optional[str] = None,
 ) -> str:
+    image_url = image_url.strip()
+    # Add python version tag if needed
+    if image_url == "python" and client_python_version:
+        image_url = f"python:{client_python_version}"
     client_version = _convert_python_package_version_to_image_tag(client_version)
     server_version = _convert_python_package_version_to_image_tag(
         mlrun.utils.version.Version().get()["version"]
     )
-    image_url = image_url.strip()
     mlrun_version = config.images_tag or client_version or server_version
-    tag = mlrun_version
+    tag = mlrun_version or ""
     # TODO: Remove condition when mlrun/mlrun-kfp image is also supported
     if "mlrun-kfp" not in image_url:
@@ -2093,22 +2111,60 @@ def join_urls(base_url: Optional[str], path: Optional[str]) -> str:
 class Workflow:
     @staticmethod
-    def get_workflow_steps(workflow_id: str, project: str) -> list:
+    def get_workflow_steps(
+        db: "mlrun.db.RunDBInterface", workflow_id: str, project: str
+    ) -> list:
         steps = []
-        db = mlrun.get_run_db()
         def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
+            # on kfp 1.8 argo sets the pod hostname differently than what we have with kfp 2.5
+            # therefore, the heuristic needs to change. what we do here is first trying against 1.8 conventions
+            # and if we can't find it then falling back to 2.5
             try:
-                _run = db.list_runs(
+                # runner_pod = x-y-N
+                _runs = db.list_runs(
                     project=project,
                     labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={_step.node_name}",
-                )[0]
+                )
+                if not _runs:
+                    try:
+                        # x-y-N -> x-y, N
+                        node_name_initials, node_name_generated_id = (
+                            _step.node_name.rsplit("-", 1)
+                        )
+                    except ValueError:
+                        # defensive programming, if the node name is not in the expected format
+                        node_name_initials = _step.node_name
+                        node_name_generated_id = ""
+                    # compile the expected runner pod hostname as per kfp >= 2.4
+                    # x-y, Z, N -> runner_pod = x-y-Z-N
+                    runner_pod_value = "-".join(
+                        [
+                            node_name_initials,
+                            _step.display_name,
+                            node_name_generated_id,
+                        ]
+                    ).rstrip("-")
+                    logger.debug(
+                        "No run found for step, trying with different node name",
+                        step_node_name=runner_pod_value,
+                    )
+                    _runs = db.list_runs(
+                        project=project,
+                        labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={runner_pod_value}",
+                    )
+                _run = _runs[0]
             except IndexError:
+                logger.warning("No run found for step", step=_step.to_dict())
                 _run = {
                     "metadata": {
                         "name": _step.display_name,
                         "project": project,
                     },
+                    "status": {},
                 }
             _run["step_kind"] = _step.step_type
             if _step.skipped:
@@ -2226,8 +2282,9 @@ class Workflow:
             namespace=mlrun.mlconf.namespace,
         )
-        # arbitrary timeout of 5 seconds, the workflow should be done by now
-        kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
+        # arbitrary timeout of 30 seconds, the workflow should be done by now, however sometimes kfp takes a few
+        # seconds to update the workflow status
+        kfp_run = kfp_client.wait_for_run_completion(workflow_id, 30)
         if not kfp_run:
             return None

mlrun/utils/notifications/notification/slack.py CHANGED Viewed

@@ -16,6 +16,7 @@ import typing
 import aiohttp
+import mlrun.common.runtimes.constants as runtimes_constants
 import mlrun.common.schemas
 import mlrun.lists
 import mlrun.utils.helpers
@@ -177,7 +178,10 @@ class SlackNotification(NotificationBase):
         # Only show the URL if the run is not a function (serving or mlrun function)
         kind = run.get("step_kind")
         state = run["status"].get("state", "")
-        if state != "skipped" and (url and not kind or kind == "run"):
+        if state != runtimes_constants.RunStates.skipped and (
+            url and not kind or kind == "run"
+        ):
             line = f'<{url}|*{meta.get("name")}*>'
         else:
             line = meta.get("name")

mlrun/utils/notifications/notification_pusher.py CHANGED Viewed

@@ -287,7 +287,8 @@ class NotificationPusher(_NotificationPusherBase):
             )
             project = run.metadata.project
             workflow_id = run.status.results.get("workflow_id", None)
-            runs.extend(Workflow.get_workflow_steps(workflow_id, project))
+            db = mlrun.get_run_db()
+            runs.extend(Workflow.get_workflow_steps(db, workflow_id, project))
         message = (
             self.messages.get(run.state(), "").format(resource=resource)

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "104d0f2f30c2896ede9efe0344f8cbaed8b5616c",
-  "version": "1.10.0-rc2"
+  "git_commit": "aca543927ff594b8db166e423cb47001dfdf7bcc",
+  "version": "1.10.0-rc4"
 }

mlrun 1.10.0rc2__py3-none-any.whl → 1.10.0rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc2py3-none-any.whl → 1.10.0rc4py3-none-any.whl