PyPI - mlrun - Versions diffs - 1.10.0rc3__py3-none-any.whl → 1.10.0rc4__py3-none-any.whl - Mend

mlrun 1.10.0rc3py3-none-any.whl → 1.10.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (42) hide show

mlrun/artifacts/__init__.py +1 -0
mlrun/artifacts/base.py +14 -2
mlrun/artifacts/helpers.py +40 -0
mlrun/artifacts/llm_prompt.py +165 -0
mlrun/artifacts/manager.py +13 -1
mlrun/artifacts/model.py +91 -11
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/runtimes/constants.py +0 -14
mlrun/common/schemas/artifact.py +12 -12
mlrun/common/schemas/pipeline.py +0 -16
mlrun/common/schemas/project.py +0 -17
mlrun/common/schemas/runs.py +0 -17
mlrun/config.py +1 -1
mlrun/datastore/base.py +2 -2
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/datastore_profile.py +1 -9
mlrun/datastore/redis.py +2 -3
mlrun/datastore/sources.py +0 -9
mlrun/datastore/storeytargets.py +2 -5
mlrun/datastore/targets.py +6 -56
mlrun/datastore/utils.py +1 -11
mlrun/db/base.py +1 -0
mlrun/db/httpdb.py +6 -0
mlrun/db/nopdb.py +1 -0
mlrun/execution.py +87 -1
mlrun/model.py +0 -5
mlrun/projects/project.py +241 -4
mlrun/run.py +0 -18
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/serving/states.py +67 -3
mlrun/serving/v2_serving.py +1 -1
mlrun/utils/helpers.py +58 -7
mlrun/utils/notifications/notification/slack.py +5 -1
mlrun/utils/notifications/notification_pusher.py +2 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/METADATA +5 -5
{mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/RECORD +42 -40
{mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/WHEEL +1 -1
{mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/top_level.txt +0 -0

mlrun/projects/project.py CHANGED Viewed

@@ -82,6 +82,7 @@ from ..artifacts import (
     DatasetArtifact,
     DocumentArtifact,
     DocumentLoaderSpec,
+    LLMPromptArtifact,
     ModelArtifact,
 )
 from ..artifacts.manager import ArtifactManager, dict_to_artifact, extend_artifact_path
@@ -1799,6 +1800,8 @@ class MlrunProject(ModelObj):
         training_set=None,
         label_column=None,
         extra_data=None,
+        model_url: Optional[str] = None,
+        default_config=None,
         **kwargs,
     ) -> ModelArtifact:
         """Log a model artifact and optionally upload it to datastore
@@ -1841,7 +1844,9 @@ class MlrunProject(ModelObj):
         :param label_column:    which columns in the training set are the label (target) columns
         :param extra_data:      key/value list of extra files/charts to link with this dataset
                                 value can be absolute path | relative path (to model dir) | bytes | artifact object
+        :param model_url:       Remote model url.
+        :param default_config:  Default configuration for client building
+                                Saved as a sub-dictionary under the parameter.
         :returns: model artifact object
         """
@@ -1864,6 +1869,8 @@ class MlrunProject(ModelObj):
             feature_vector=feature_vector,
             feature_weights=feature_weights,
             extra_data=extra_data,
+            model_url=model_url,
+            default_config=default_config,
             **kwargs,
         )
         if training_set is not None:
@@ -1881,6 +1888,87 @@ class MlrunProject(ModelObj):
         )
         return item
+    def log_llm_prompt(
+        self,
+        key,
+        prompt_string: Optional[str] = None,
+        prompt_path: Optional[str] = None,
+        prompt_legend: Optional[dict] = None,
+        model_artifact: Union[ModelArtifact, str] = None,
+        model_configuration: Optional[dict] = None,
+        description: Optional[str] = None,
+        target_path: Optional[str] = None,
+        artifact_path: Optional[str] = None,
+        tag: Optional[str] = None,
+        labels: Optional[Union[list[str], str]] = None,
+        upload: Optional[bool] = None,
+        **kwargs,
+    ) -> LLMPromptArtifact:
+        """
+        Log an LLM prompt artifact to the project.
+        This method creates and logs an `LLMPromptArtifact` which captures a prompt definition for large language model
+        (LLM) interactions. The prompt can be provided as a string or a file, and may include metadata like generation
+        parameters, a legend for variable injection, and references to a parent model artifact.
+        If the prompt content exceeds a certain length, it may be stored in a temporary file and logged accordingly.
+        Examples::
+            # Log a prompt from file
+            project.log_llm_prompt(
+                key="qa-prompt",
+                prompt_path="prompts/qa_template.txt",
+                prompt_legend={"question": "user_question"},
+                model_artifact=model,
+                tag="v2",
+            )
+        :param key: Unique key for the prompt artifact.
+        :param prompt_string: Raw prompt text. Mutually exclusive with `prompt_path`.
+        :param prompt_path: Path to a file containing the prompt. Mutually exclusive with `prompt_string`.
+        :param prompt_legend: A dictionary where each key is a placeholder in the prompt (e.g., ``{user_name}``)
+               and the value is a description or explanation of what that placeholder represents.
+               Useful for documenting and clarifying dynamic parts of the prompt.
+        :param model_artifact: Reference to the parent model (either `ModelArtifact` or model URI string).
+        :param model_configuration: Configuration dictionary for model generation parameters
+               (e.g., temperature, max tokens).
+        :param description: Optional description of the prompt.
+        :param target_path: Optional local target path for saving prompt content.
+        :param artifact_path: Storage path for the logged artifact.
+        :param tag: Version tag for the artifact (e.g., "v1", "latest").
+        :param labels: Labels to tag the artifact for filtering and organization.
+        :param upload: Whether to upload the artifact to a remote datastore. Defaults to True.
+        :param kwargs: Additional attributes to pass into the `LLMPromptArtifact`.
+        :returns: The logged `LLMPromptArtifact` object.
+        """
+        llm_prompt = LLMPromptArtifact(
+            key=key,
+            project=self.name,
+            prompt_string=prompt_string,
+            prompt_path=prompt_path,
+            prompt_legend=prompt_legend,
+            model_artifact=model_artifact,
+            model_configuration=model_configuration,
+            target_path=target_path,
+            description=description,
+            **kwargs,
+        )
+        item = cast(
+            LLMPromptArtifact,
+            self.log_artifact(
+                llm_prompt,
+                artifact_path=artifact_path,
+                tag=tag,
+                upload=upload,
+                labels=labels,
+            ),
+        )
+        return item
     def get_vector_store_collection(
         self,
         vector_store: "VectorStore",  # noqa: F821
@@ -4474,8 +4562,8 @@ class MlrunProject(ModelObj):
     def list_models(
         self,
-        name=None,
-        tag=None,
+        name: Optional[str] = None,
+        tag: Optional[str] = None,
         labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
         since=None,
         until=None,
@@ -4486,7 +4574,7 @@ class MlrunProject(ModelObj):
         format_: Optional[
             mlrun.common.formatters.ArtifactFormat
         ] = mlrun.common.formatters.ArtifactFormat.full,
-    ):
+    ) -> list[ModelArtifact]:
         """List models in project, filtered by various parameters.
         Examples::
@@ -4595,6 +4683,155 @@ class MlrunProject(ModelObj):
             **kwargs,
         )
+    def list_llm_prompts(
+        self,
+        name: Optional[str] = None,
+        tag: Optional[str] = None,
+        labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
+        since: Optional[datetime.datetime] = None,
+        until: Optional[datetime.datetime] = None,
+        iter: Optional[int] = None,
+        best_iteration: bool = False,
+        tree: Optional[str] = None,
+        model: Optional[Union[str, Artifact]] = None,
+        format_: Optional[
+            mlrun.common.formatters.ArtifactFormat
+        ] = mlrun.common.formatters.ArtifactFormat.full,
+        partition_by: Optional[
+            Union[mlrun.common.schemas.ArtifactPartitionByField, str]
+        ] = None,
+        rows_per_partition: int = 1,
+        partition_sort_by: Optional[
+            Union[mlrun.common.schemas.SortField, str]
+        ] = mlrun.common.schemas.SortField.updated,
+        partition_order: Union[
+            mlrun.common.schemas.OrderType, str
+        ] = mlrun.common.schemas.OrderType.desc,
+    ) -> list[mlrun.artifacts.llm_prompt.LLMPromptArtifact]:
+        """List LLM prompt artifacts in the project with support for filtering.
+        This method returns a list of LLM prompt artifacts, filtered by parameters such as name, tag, labels,
+        model association, iteration, and more. It can be used to retrieve the latest, best, or specific versions
+        of prompts tied to a model or general project context.
+        Examples::
+            # Get all latest tagged prompts
+            prompts = project.list_llm_prompts(tag="latest")
+            # Get prompts associated with a specific model
+            prompts = project.list_llm_prompts(model=ModelArtifact("m1"))
+            # Get prompts filtered by label
+            prompts = project.list_llm_prompts(labels={"use_case": "chatbot"})
+            # Get prompts using a name wildcard
+            prompts = project.list_llm_prompts(name="~chat")
+        :param name: Name of the prompt artifact. Prefix with '~' for wildcard search (case-insensitive).
+        :param tag: Filter artifacts by this tag (e.g., 'latest', 'prod').
+        :param labels: Filter llm-prompt artifacts by label key-value pairs or key existence. This can be provided as:
+                       - A dictionary in the format `{"label": "value"}` to match specific label key-value pairs,
+                         or `{"label": None}` to check for key existence.
+                       - A list of strings formatted as `"label=value"` to match specific label key-value pairs,
+                         or just `"label"` for key existence.
+                       - A comma-separated string formatted as `"label1=value1,label2"` to match entities with
+                         the specified key-value pairs or key existence.
+        :param since: Return artifacts updated after this date (as datetime object).
+        :param until: Return artifacts updated before this date (as datetime object).
+        :param iter: Retrieve a specific iteration. Use `0` for root; `None` for all.
+        :param best_iteration: Returns the llm-prompt artifact which belongs to the best iteration of a given run,
+            in the case of artifacts generated from a hyper-param run. If only a single iteration exists, will return
+            the artifact from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
+        :param tree: Filter by artifact tree ID (e.g., for lineage filtering).
+        :param model: Return prompts associated with this model (can be `Artifact` URI or `Artifact` object).
+        :param format_: The format in which to return the artifacts. Default is 'full'.
+        :param partition_by: Field to group results by. When `partition_by` is specified, the `partition_sort_by`
+            parameter must be provided as well.
+        :param rows_per_partition: How many top rows (per sorting defined by `partition_sort_by` and `partition_order`)
+            to return per group. Default value is 1.
+        :param partition_sort_by: What field to sort the results by, within each partition defined by `partition_by`.
+            Currently the only allowed values are `created` and `updated`.
+        :param partition_order: Order of sorting within partitions - `asc` or `desc`. Default is `desc`.
+        :returns: A list of filtered `LLMPromptArtifact` objects matching the given parameters.
+        """
+        db = mlrun.db.get_run_db(secrets=self._secrets)
+        return db.list_artifacts(
+            name=name,
+            project=self.metadata.name,
+            tag=tag,
+            labels=labels,
+            since=since,
+            until=until,
+            iter=iter,
+            best_iteration=best_iteration,
+            kind=mlrun.artifacts.llm_prompt.LLMPromptArtifact.kind,
+            tree=tree,
+            parent=model.uri if isinstance(model, Artifact) else model,
+            format_=format_,
+            partition_by=partition_by,
+            rows_per_partition=rows_per_partition,
+            partition_sort_by=partition_sort_by,
+            partition_order=partition_order,
+        ).to_objects()
+    def paginated_list_llm_prompts(
+        self,
+        *args,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        page_token: Optional[str] = None,
+        **kwargs,
+    ) -> tuple[mlrun.lists.ArtifactList, Optional[str]]:
+        """Retrieve a paginated list of LLM prompt artifacts for the current project.
+        This method returns a list of LLM prompt artifacts, supporting both token-based and page-number-based
+        pagination. You can filter and navigate through the results using the optional `page`, `page_size`, and
+        `page_token` parameters.
+        Examples::
+            # Fetch the first page with up to 5 prompt artifacts
+            prompts, token = project.paginated_list_llm_prompts(page_size=5)
+            # Fetch the next page using the page token
+            prompts, token = project.paginated_list_llm_prompts(page_token=token)
+            # Fetch a specific page (e.g., page 3)
+            prompts, token = project.paginated_list_llm_prompts(page=3, page_size=5)
+            # Retrieve all prompt artifacts across pages
+            all_prompts = []
+            token = None
+            while True:
+                page_prompts, token = project.paginated_list_llm_prompts(
+                    page_token=token, page_size=5
+                )
+                all_prompts.extend(page_prompts)
+                if not token:
+                    break
+            print(f"Total retrieved prompts: {len(all_prompts)}")
+        :param page: Page number to retrieve (alternative to page_token).
+        :param page_size: Number of items per page. Defaults to `mlrun.mlconf.httpdb.pagination.default_page_size`.
+        :param page_token: Token for retrieving the next page of results (used for continuous iteration).
+        :returns: A tuple of (ArtifactList of LLM prompts, next page_token or None if no more pages).
+        """
+        db = mlrun.db.get_run_db(secrets=self._secrets)
+        return db.paginated_list_artifacts(
+            *args,
+            project=self.metadata.name,
+            kind=mlrun.artifacts.llm_prompt.LLMPromptArtifact.kind,
+            page=page,
+            page_size=page_size,
+            page_token=page_token,
+            **kwargs,
+        )
     def list_functions(
         self,
         name: Optional[str] = None,

mlrun/run.py CHANGED Viewed

@@ -21,7 +21,6 @@ import tempfile
 import time
 import typing
 import uuid
-import warnings
 from base64 import b64decode
 from copy import deepcopy
 from os import environ, makedirs, path
@@ -206,7 +205,6 @@ def get_or_create_ctx(
     rundb: Union[str, "mlrun.db.RunDBInterface"] = "",
     project: str = "",
     upload_artifacts: bool = False,
-    labels: Optional[dict] = None,
 ) -> MLClientCtx:
     """
     Called from within the user program to obtain a run context.
@@ -226,7 +224,6 @@ def get_or_create_ctx(
     :param project:  project to initiate the context in (by default `mlrun.mlconf.active_project`)
     :param upload_artifacts:  when using local context (not as part of a job/run), upload artifacts to the
                               system default artifact path location
-    :param labels: (deprecated - use spec instead) dict of the context labels.
     :return: execution context
     Examples::
@@ -259,21 +256,6 @@ def get_or_create_ctx(
         context.log_artifact("results.html", body=b"<b> Some HTML <b>", viewer="web-app")
     """
-    if labels:
-        warnings.warn(
-            "The `labels` argument is deprecated in 1.7.0 and will be removed in 1.10.0. "
-            "Please use `spec` instead, e.g.:\n"
-            "spec={'metadata': {'labels': {'key': 'value'}}}",
-            FutureWarning,
-        )
-        if spec is None:
-            spec = {}
-        if "metadata" not in spec:
-            spec["metadata"] = {}
-        if "labels" not in spec["metadata"]:
-            spec["metadata"]["labels"] = {}
-        spec["metadata"]["labels"].update(labels)
     if global_context.get() and not spec and not event:
         return global_context.get()

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -103,6 +103,12 @@ class RemoteSparkRuntime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls):
+        if not mlrun.get_current_project(silent=True):
+            raise mlrun.errors.MLRunMissingProjectError(
+                "An active project is required to run deploy_default_image(). "
+                "This can be set by calling get_or_create_project(), load_project(), or new_project()."
+            )
         sj = mlrun.new_function(
             kind="remote-spark", name="remote-spark-default-image-deploy-temp"
         )

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -804,6 +804,12 @@ class Spark3Runtime(KubejobRuntime):
     @classmethod
     def deploy_default_image(cls, with_gpu=False):
+        if not mlrun.get_current_project(silent=True):
+            raise mlrun.errors.MLRunMissingProjectError(
+                "An active project is required to run deploy_default_image(). "
+                "This can be set by calling get_or_create_project()."
+            )
         sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
         sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)

mlrun/serving/states.py CHANGED Viewed

@@ -32,12 +32,14 @@ import storey.utils
 import mlrun
 import mlrun.artifacts
 import mlrun.common.schemas as schemas
+from mlrun.artifacts.model import ModelArtifact
 from mlrun.datastore.datastore_profile import (
     DatastoreProfileKafkaSource,
     DatastoreProfileKafkaTarget,
     DatastoreProfileV3io,
     datastore_profile_read,
 )
+from mlrun.datastore.store_resources import get_store_resource
 from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
 from mlrun.utils import logger
@@ -955,10 +957,33 @@ class RouterStep(TaskStep):
 class Model(storey.ParallelExecutionRunnable):
+    def __init__(
+        self,
+        name: str,
+        raise_exception: bool = True,
+        artifact_uri: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(name=name, raise_exception=raise_exception, **kwargs)
+        if artifact_uri is not None and not isinstance(artifact_uri, str):
+            raise MLRunInvalidArgumentError("artifact_uri argument must be a string")
+        self.artifact_uri = artifact_uri
     def load(self) -> None:
         """Override to load model if needed."""
         pass
+    def _get_artifact_object(self) -> Union[ModelArtifact, None]:
+        if self.artifact_uri:
+            if mlrun.datastore.is_store_uri(self.artifact_uri):
+                return get_store_resource(self.artifact_uri)
+            else:
+                raise ValueError(
+                    "Could not get artifact, artifact_uri must be a valid artifact store URI"
+                )
+        else:
+            return None
     def init(self):
         self.load()
@@ -976,6 +1001,39 @@ class Model(storey.ParallelExecutionRunnable):
     async def run_async(self, body: Any, path: str) -> Any:
         return self.predict(body)
+    def get_local_model_path(self, suffix="") -> (str, dict):
+        """get local model file(s) and extra data items by using artifact
+        If the model file is stored in remote cloud storage, download it to the local file system
+        Examples
+        --------
+        ::
+            def load(self):
+                model_file, extra_data = self.get_local_model_path(suffix=".pkl")
+                self.model = load(open(model_file, "rb"))
+                categories = extra_data["categories"].as_df()
+        Parameters
+        ----------
+        suffix : str
+            optional, model file suffix (when the model_path is a directory)
+        Returns
+        -------
+        str
+            (local) model file
+        dict
+            extra dataitems dictionary
+        """
+        artifact = self._get_artifact_object()
+        if artifact:
+            model_file, _, extra_dataitems = mlrun.artifacts.get_model(
+                suffix=suffix, model_dir=artifact
+            )
+            return model_file, extra_dataitems
+        return None, None
 class ModelSelector:
     """Used to select which models to run on each event."""
@@ -1089,6 +1147,14 @@ class ModelRunnerStep(TaskStep, StepToDict):
         """
         # TODO allow model_class as Model object as part of ML-9924
         model_parameters = model_parameters or {}
+        model_artifact = (
+            model_artifact.uri
+            if isinstance(model_artifact, mlrun.artifacts.Artifact)
+            else model_artifact
+        )
+        model_parameters["artifact_uri"] = model_parameters.get(
+            "artifact_uri", model_artifact
+        )
         if model_parameters.get("name", endpoint_name) != endpoint_name:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Inconsistent name for model added to ModelRunnerStep."
@@ -1111,9 +1177,7 @@ class ModelRunnerStep(TaskStep, StepToDict):
             schemas.MonitoringData.INPUT_PATH: input_path,
             schemas.MonitoringData.CREATION_STRATEGY: creation_strategy,
             schemas.MonitoringData.LABELS: labels,
-            schemas.MonitoringData.MODEL_PATH: model_artifact.uri
-            if isinstance(model_artifact, mlrun.artifacts.Artifact)
-            else model_artifact,
+            schemas.MonitoringData.MODEL_PATH: model_artifact,
         }
         self.class_args[schemas.ModelRunnerStepData.MODELS] = models
         self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data

mlrun/serving/v2_serving.py CHANGED Viewed

@@ -177,7 +177,7 @@ class V2ModelServer(StepToDict):
         """set real time metric (for model monitoring)"""
         self.metrics[name] = value
-    def get_model(self, suffix=""):
+    def get_model(self, suffix="") -> (str, dict):
         """get the model file(s) and metadata from model store
         the method returns a path to the model file and the extra data (dict of dataitem objects)

mlrun/utils/helpers.py CHANGED Viewed

@@ -60,6 +60,7 @@ import mlrun_pipelines.common.constants
 import mlrun_pipelines.models
 import mlrun_pipelines.utils
 from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
+from mlrun.common.schemas import ArtifactCategories
 from mlrun.config import config
 from mlrun_pipelines.models import PipelineRun
@@ -96,6 +97,7 @@ class StorePrefix:
     Model = "models"
     Dataset = "datasets"
     Document = "documents"
+    LLMPrompt = "llm-prompts"
     @classmethod
     def is_artifact(cls, prefix):
@@ -107,6 +109,7 @@ class StorePrefix:
             "model": cls.Model,
             "dataset": cls.Dataset,
             "document": cls.Document,
+            "llm-prompt": cls.LLMPrompt,
         }
         return kind_map.get(kind, cls.Artifact)
@@ -119,6 +122,7 @@ class StorePrefix:
             cls.FeatureSet,
             cls.FeatureVector,
             cls.Document,
+            cls.LLMPrompt,
         ]
@@ -131,7 +135,16 @@ def get_artifact_target(item: dict, project=None):
     kind = item.get("kind")
     uid = item["metadata"].get("uid")
-    if kind in {"dataset", "model", "artifact"} and db_key:
+    if (
+        kind
+        in {
+            ArtifactCategories.dataset,
+            ArtifactCategories.model,
+            ArtifactCategories.llm_prompt,
+            "artifact",
+        }
+        and db_key
+    ):
         target = (
             f"{DB_SCHEMA}://{StorePrefix.kind_to_prefix(kind)}/{project_str}/{db_key}"
         )
@@ -2098,22 +2111,60 @@ def join_urls(base_url: Optional[str], path: Optional[str]) -> str:
 class Workflow:
     @staticmethod
-    def get_workflow_steps(workflow_id: str, project: str) -> list:
+    def get_workflow_steps(
+        db: "mlrun.db.RunDBInterface", workflow_id: str, project: str
+    ) -> list:
         steps = []
-        db = mlrun.get_run_db()
         def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
+            # on kfp 1.8 argo sets the pod hostname differently than what we have with kfp 2.5
+            # therefore, the heuristic needs to change. what we do here is first trying against 1.8 conventions
+            # and if we can't find it then falling back to 2.5
             try:
-                _run = db.list_runs(
+                # runner_pod = x-y-N
+                _runs = db.list_runs(
                     project=project,
                     labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={_step.node_name}",
-                )[0]
+                )
+                if not _runs:
+                    try:
+                        # x-y-N -> x-y, N
+                        node_name_initials, node_name_generated_id = (
+                            _step.node_name.rsplit("-", 1)
+                        )
+                    except ValueError:
+                        # defensive programming, if the node name is not in the expected format
+                        node_name_initials = _step.node_name
+                        node_name_generated_id = ""
+                    # compile the expected runner pod hostname as per kfp >= 2.4
+                    # x-y, Z, N -> runner_pod = x-y-Z-N
+                    runner_pod_value = "-".join(
+                        [
+                            node_name_initials,
+                            _step.display_name,
+                            node_name_generated_id,
+                        ]
+                    ).rstrip("-")
+                    logger.debug(
+                        "No run found for step, trying with different node name",
+                        step_node_name=runner_pod_value,
+                    )
+                    _runs = db.list_runs(
+                        project=project,
+                        labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={runner_pod_value}",
+                    )
+                _run = _runs[0]
             except IndexError:
+                logger.warning("No run found for step", step=_step.to_dict())
                 _run = {
                     "metadata": {
                         "name": _step.display_name,
                         "project": project,
                     },
+                    "status": {},
                 }
             _run["step_kind"] = _step.step_type
             if _step.skipped:
@@ -2231,9 +2282,9 @@ class Workflow:
             namespace=mlrun.mlconf.namespace,
         )
-        # arbitrary timeout of 60 seconds, the workflow should be done by now, however sometimes kfp takes a few
+        # arbitrary timeout of 30 seconds, the workflow should be done by now, however sometimes kfp takes a few
         # seconds to update the workflow status
-        kfp_run = kfp_client.wait_for_run_completion(workflow_id, 60)
+        kfp_run = kfp_client.wait_for_run_completion(workflow_id, 30)
         if not kfp_run:
             return None

mlrun/utils/notifications/notification/slack.py CHANGED Viewed

@@ -16,6 +16,7 @@ import typing
 import aiohttp
+import mlrun.common.runtimes.constants as runtimes_constants
 import mlrun.common.schemas
 import mlrun.lists
 import mlrun.utils.helpers
@@ -177,7 +178,10 @@ class SlackNotification(NotificationBase):
         # Only show the URL if the run is not a function (serving or mlrun function)
         kind = run.get("step_kind")
         state = run["status"].get("state", "")
-        if state != "skipped" and (url and not kind or kind == "run"):
+        if state != runtimes_constants.RunStates.skipped and (
+            url and not kind or kind == "run"
+        ):
             line = f'<{url}|*{meta.get("name")}*>'
         else:
             line = meta.get("name")

mlrun/utils/notifications/notification_pusher.py CHANGED Viewed

@@ -287,7 +287,8 @@ class NotificationPusher(_NotificationPusherBase):
             )
             project = run.metadata.project
             workflow_id = run.status.results.get("workflow_id", None)
-            runs.extend(Workflow.get_workflow_steps(workflow_id, project))
+            db = mlrun.get_run_db()
+            runs.extend(Workflow.get_workflow_steps(db, workflow_id, project))
         message = (
             self.messages.get(run.state(), "").format(resource=resource)

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "210c516a3ed5c2f2c7223f31fdfd9e99b73d56b6",
-  "version": "1.10.0-rc3"
+  "git_commit": "aca543927ff594b8db166e423cb47001dfdf7bcc",
+  "version": "1.10.0-rc4"
 }

mlrun 1.10.0rc3__py3-none-any.whl → 1.10.0rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc3py3-none-any.whl → 1.10.0rc4py3-none-any.whl