PyPI - mlrun - Versions diffs - 1.10.0rc11__py3-none-any.whl → 1.10.0rc13__py3-none-any.whl - Mend

mlrun 1.10.0rc11py3-none-any.whl → 1.10.0rc13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (59) hide show

mlrun/__init__.py +2 -1
mlrun/__main__.py +7 -1
mlrun/artifacts/base.py +9 -3
mlrun/artifacts/dataset.py +2 -1
mlrun/artifacts/llm_prompt.py +6 -2
mlrun/artifacts/model.py +2 -2
mlrun/common/constants.py +1 -0
mlrun/common/runtimes/constants.py +10 -1
mlrun/common/schemas/__init__.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
mlrun/common/schemas/serving.py +7 -0
mlrun/config.py +21 -2
mlrun/datastore/__init__.py +3 -1
mlrun/datastore/alibaba_oss.py +1 -1
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +6 -31
mlrun/datastore/datastore.py +109 -33
mlrun/datastore/datastore_profile.py +31 -0
mlrun/datastore/dbfs_store.py +1 -1
mlrun/datastore/google_cloud_storage.py +2 -2
mlrun/datastore/model_provider/__init__.py +13 -0
mlrun/datastore/model_provider/model_provider.py +160 -0
mlrun/datastore/model_provider/openai_provider.py +144 -0
mlrun/datastore/remote_client.py +65 -0
mlrun/datastore/s3.py +1 -1
mlrun/datastore/storeytargets.py +1 -1
mlrun/datastore/utils.py +22 -0
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +1 -1
mlrun/db/httpdb.py +9 -4
mlrun/db/nopdb.py +1 -1
mlrun/execution.py +28 -7
mlrun/launcher/base.py +23 -13
mlrun/launcher/local.py +3 -1
mlrun/launcher/remote.py +4 -2
mlrun/model.py +65 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +175 -8
mlrun/package/packagers_manager.py +2 -0
mlrun/projects/operations.py +8 -1
mlrun/projects/pipelines.py +40 -18
mlrun/projects/project.py +28 -5
mlrun/run.py +42 -2
mlrun/runtimes/__init__.py +6 -0
mlrun/runtimes/base.py +24 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/local.py +1 -6
mlrun/serving/server.py +1 -2
mlrun/serving/states.py +438 -23
mlrun/serving/system_steps.py +27 -29
mlrun/utils/helpers.py +13 -2
mlrun/utils/notifications/notification_pusher.py +15 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/METADATA +2 -2
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/RECORD +59 -55
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/top_level.txt +0 -0

mlrun/execution.py CHANGED Viewed

@@ -26,6 +26,7 @@ from dateutil import parser
 import mlrun
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.formatters
+import mlrun.common.runtimes.constants
 from mlrun.artifacts import (
     Artifact,
     DatasetArtifact,
@@ -91,6 +92,8 @@ class MLClientCtx:
         self._autocommit = autocommit
         self._notifications = []
         self._state_thresholds = {}
+        self._retry_spec = {}
+        self._retry_count = None
         self._labels = {}
         self._annotations = {}
@@ -432,6 +435,7 @@ class MLClientCtx:
             self._tolerations = spec.get("tolerations", self._tolerations)
             self._affinity = spec.get("affinity", self._affinity)
             self._reset_on_run = spec.get("reset_on_run", self._reset_on_run)
+            self._retry_spec = spec.get("retry", self._retry_spec)
         self._init_dbs(rundb)
@@ -450,10 +454,11 @@ class MLClientCtx:
         if start:
             start = parser.parse(start) if isinstance(start, str) else start
             self._start_time = start
-        self._state = "running"
+        self._state = mlrun.common.runtimes.constants.RunStates.running
         status = attrs.get("status")
-        if include_status and status:
+        retry_configured = self._retry_spec and self._retry_spec.get("count")
+        if (include_status or retry_configured) and status:
             self._results = status.get("results", self._results)
             for artifact in status.get("artifacts", []):
                 artifact_obj = dict_to_artifact(artifact)
@@ -462,7 +467,10 @@ class MLClientCtx:
                 )
             for key, uri in status.get("artifact_uris", {}).items():
                 self._artifacts_manager.artifact_uris[key] = uri
-            self._state = status.get("state", self._state)
+            self._retry_count = status.get("retry_count", self._retry_count)
+            # if run is a retry, the state needs to move to running
+            if include_status:
+                self._state = status.get("state", self._state)
         # No need to store the run for every worker
         if store_run and self.is_logging_worker():
@@ -953,6 +961,11 @@ class MLClientCtx:
         :returns: The logged `LLMPromptArtifact` object.
         """
+        if not prompt_string and not prompt_path:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Either 'prompt_string' or 'prompt_path' must be provided"
+            )
         llm_prompt = LLMPromptArtifact(
             key=key,
             project=self.project or "",
@@ -1107,13 +1120,13 @@ class MLClientCtx:
         :param completed: Mark run as completed
         """
         # Changing state to completed is allowed only when the execution is in running state
-        if self._state != "running":
+        if self._state != mlrun.common.runtimes.constants.RunStates.running:
             completed = False
         if message:
             self._annotations["message"] = message
         if completed:
-            self._state = "completed"
+            self._state = mlrun.common.runtimes.constants.RunStates.completed
         if self._parent:
             self._parent.update_child_iterations()
@@ -1147,9 +1160,15 @@ class MLClientCtx:
         updates = {"status.last_update": now_date().isoformat()}
         if error is not None:
-            self._state = "error"
+            state = mlrun.common.runtimes.constants.RunStates.error
+            max_retries = self._retry_spec.get("count", 0)
+            self._retry_count = self._retry_count or 0
+            if max_retries and self._retry_count < max_retries:
+                state = mlrun.common.runtimes.constants.RunStates.pending_retry
+            self._state = state
             self._error = str(error)
-            updates["status.state"] = "error"
+            updates["status.state"] = state
             updates["status.error"] = error
         elif (
             execution_state
@@ -1241,11 +1260,13 @@ class MLClientCtx:
                 "node_selector": self._node_selector,
                 "tolerations": self._tolerations,
                 "affinity": self._affinity,
+                "retry": self._retry_spec,
             },
             "status": {
                 "results": self._results,
                 "start_time": to_date_str(self._start_time),
                 "last_update": to_date_str(self._last_update),
+                "retry_count": self._retry_count,
             },
         }

mlrun/launcher/base.py CHANGED Viewed

@@ -18,6 +18,8 @@ import os
 import uuid
 from typing import Any, Callable, Optional, Union
+import mlrun.common.constants
+import mlrun.common.runtimes.constants
 import mlrun.common.schemas
 import mlrun.config
 import mlrun.errors
@@ -72,6 +74,7 @@ class BaseLauncher(abc.ABC):
         notifications: Optional[list[mlrun.model.Notification]] = None,
         returns: Optional[list[Union[str, dict[str, str]]]] = None,
         state_thresholds: Optional[dict[str, int]] = None,
+        retry: Optional[Union[mlrun.model.Retry, dict]] = None,
     ) -> "mlrun.run.RunObject":
         """run the function from the server/client[local/remote]"""
         pass
@@ -133,7 +136,7 @@ class BaseLauncher(abc.ABC):
         """Check if the runtime requires to build the image and updates the spec accordingly"""
         pass
-    def _validate_runtime(
+    def _validate_run(
         self,
         runtime: "mlrun.runtimes.BaseRuntime",
         run: "mlrun.run.RunObject",
@@ -194,7 +197,7 @@ class BaseLauncher(abc.ABC):
             )
     @classmethod
-    def _validate_run_single_param(cls, param_name, param_value):
+    def _validate_run_single_param(cls, param_name: str, param_value: int):
         # verify that integer parameters don't exceed a int64
         if isinstance(param_value, int) and abs(param_value) >= 2**63:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -203,8 +206,6 @@ class BaseLauncher(abc.ABC):
     @staticmethod
     def _create_run_object(task):
-        valid_task_types = (dict, mlrun.run.RunTemplate, mlrun.run.RunObject)
         if not task:
             # if task passed generate default RunObject
             return mlrun.run.RunObject.from_dict(task)
@@ -215,18 +216,18 @@ class BaseLauncher(abc.ABC):
         if isinstance(task, str):
             task = ast.literal_eval(task)
-        if not isinstance(task, valid_task_types):
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                f"Task is not a valid object, type={type(task)}, expected types={valid_task_types}"
-            )
+        valid_task_types = (dict, mlrun.run.RunTemplate, mlrun.run.RunObject)
+        if isinstance(task, mlrun.run.RunObject):
+            # if task is already a RunObject, we can return it as is
+            return task
         if isinstance(task, mlrun.run.RunTemplate):
             return mlrun.run.RunObject.from_template(task)
         elif isinstance(task, dict):
             return mlrun.run.RunObject.from_dict(task)
-        # task is already a RunObject
-        return task
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"Task is not a valid object, type={type(task)}, expected types={valid_task_types}"
+        )
     @staticmethod
     def _enrich_run(
@@ -246,6 +247,7 @@ class BaseLauncher(abc.ABC):
         workdir=None,
         notifications: Optional[list[mlrun.model.Notification]] = None,
         state_thresholds: Optional[dict[str, int]] = None,
+        retry: Optional[Union[mlrun.model.Retry, dict]] = None,
     ):
         run.spec.handler = (
             handler or run.spec.handler or runtime.spec.default_handler or ""
@@ -364,6 +366,7 @@ class BaseLauncher(abc.ABC):
             | state_thresholds
         )
         run.spec.state_thresholds = state_thresholds or run.spec.state_thresholds
+        run.spec.retry = retry or run.spec.retry
         return run
     @staticmethod
@@ -410,7 +413,7 @@ class BaseLauncher(abc.ABC):
             )
             if (
                 run.status.state
-                in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
+                in mlrun.common.runtimes.constants.RunStates.error_states()
             ):
                 if runtime._is_remote and not runtime.is_child:
                     logger.error(
@@ -418,7 +421,14 @@ class BaseLauncher(abc.ABC):
                         state=run.status.state,
                         status=run.status.to_dict(),
                     )
-                raise mlrun.runtimes.utils.RunError(run.error)
+                error = run.error
+                if (
+                    run.status.state
+                    == mlrun.common.runtimes.constants.RunStates.pending_retry
+                ):
+                    error = f"Run is pending retry, error: {run.error}"
+                raise mlrun.runtimes.utils.RunError(error)
             return run
         return None

mlrun/launcher/local.py CHANGED Viewed

@@ -72,6 +72,7 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
         returns: Optional[list[Union[str, dict[str, str]]]] = None,
         state_thresholds: Optional[dict[str, int]] = None,
         reset_on_run: Optional[bool] = None,
+        retry: Optional[Union[mlrun.model.Retry, dict]] = None,
     ) -> "mlrun.run.RunObject":
         # do not allow local function to be scheduled
         if schedule is not None:
@@ -122,8 +123,9 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
             workdir=workdir,
             notifications=notifications,
             state_thresholds=state_thresholds,
+            retry=retry,
         )
-        self._validate_runtime(runtime, run)
+        self._validate_run(runtime, run)
         result = self._execute(
             runtime=runtime,
             run=run,

mlrun/launcher/remote.py CHANGED Viewed

@@ -61,6 +61,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
         returns: Optional[list[Union[str, dict[str, str]]]] = None,
         state_thresholds: Optional[dict[str, int]] = None,
         reset_on_run: Optional[bool] = None,
+        retry: Optional[Union[mlrun.model.Retry, dict]] = None,
     ) -> "mlrun.run.RunObject":
         self.enrich_runtime(runtime, project)
         run = self._create_run_object(task)
@@ -82,8 +83,9 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
             workdir=workdir,
             notifications=notifications,
             state_thresholds=state_thresholds,
+            retry=retry,
         )
-        self._validate_runtime(runtime, run)
+        self._validate_run(runtime, run)
         if not runtime.is_deployed():
             if runtime.spec.build.auto_build or auto_build:
@@ -190,7 +192,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
         return self._wrap_run_result(runtime, resp, run, schedule=schedule)
     @classmethod
-    def _validate_run_single_param(cls, param_name, param_value):
+    def _validate_run_single_param(cls, param_name: str, param_value: int):
         if isinstance(param_value, pd.DataFrame):
             raise mlrun.errors.MLRunInvalidArgumentTypeError(
                 f"Parameter '{param_name}' has an unsupported value of type"

mlrun/model.py CHANGED Viewed

@@ -935,6 +935,41 @@ class HyperParamOptions(ModelObj):
             )
+class RetryBackoff(ModelObj):
+    """Backoff strategy for retries."""
+    def __init__(self, base_delay: Optional[str] = None):
+        # The base_delay time string must conform to timelength python package standards and be at least
+        # mlrun.mlconf.function.spec.retry.backoff.min_base_delay (e.g. 1000s, 1 hour 30m, 1h etc.).
+        self.base_delay = (
+            base_delay or mlrun.mlconf.function.spec.retry.backoff.default_base_delay
+        )
+class Retry(ModelObj):
+    """Retry configuration"""
+    def __init__(
+        self,
+        count: int = 0,
+        backoff: typing.Union[RetryBackoff, dict] = None,
+    ):
+        # Set to None if count is 0 to eliminate the retry configuration from the dictionary representation.
+        self.count = count or None
+        self.backoff = backoff
+    @property
+    def backoff(self) -> Optional[RetryBackoff]:
+        if not self.count:
+            # Retry is not configured, return None
+            return None
+        return self._backoff
+    @backoff.setter
+    def backoff(self, backoff):
+        self._backoff = self._verify_dict(backoff, "backoff", RetryBackoff)
 class RunSpec(ModelObj):
     """Run specification"""
@@ -971,6 +1006,7 @@ class RunSpec(ModelObj):
         node_selector=None,
         tolerations=None,
         affinity=None,
+        retry=None,
     ):
         # A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
         # keys (parameter names) and the values are the type hint given in the input keys after the colon.
@@ -1011,6 +1047,7 @@ class RunSpec(ModelObj):
         self.node_selector = node_selector or {}
         self.tolerations = tolerations or {}
         self.affinity = affinity or {}
+        self.retry = retry or {}
     def _serialize_field(
         self, struct: dict, field_name: Optional[str] = None, strip: bool = False
@@ -1212,6 +1249,14 @@ class RunSpec(ModelObj):
         self._verify_dict(state_thresholds, "state_thresholds")
         self._state_thresholds = state_thresholds
+    @property
+    def retry(self) -> Retry:
+        return self._retry
+    @retry.setter
+    def retry(self, retry: typing.Union[Retry, dict]):
+        self._retry = self._verify_dict(retry, "retry", Retry)
     def extract_type_hints_from_inputs(self):
         """
         This method extracts the type hints from the input keys in the input dictionary.
@@ -1329,6 +1374,7 @@ class RunStatus(ModelObj):
         reason: Optional[str] = None,
         notifications: Optional[dict[str, Notification]] = None,
         artifact_uris: Optional[dict[str, str]] = None,
+        retry_count: Optional[int] = None,
     ):
         self.state = state or "created"
         self.status_text = status_text
@@ -1346,6 +1392,7 @@ class RunStatus(ModelObj):
         self.notifications = notifications or {}
         # Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
         self._artifact_uris = artifact_uris or {}
+        self._retry_count = retry_count or None
     @classmethod
     def from_dict(
@@ -1399,6 +1446,21 @@ class RunStatus(ModelObj):
         self._artifact_uris = resolved_artifact_uris
+    @property
+    def retry_count(self) -> Optional[int]:
+        """
+        The number of retries that were made for this run.
+        """
+        return self._retry_count
+    @retry_count.setter
+    def retry_count(self, retry_count: int):
+        """
+        Set the number of retries that were made for this run.
+        :param retry_count: The number of retries.
+        """
+        self._retry_count = retry_count
     def is_failed(self) -> Optional[bool]:
         """
         This method returns whether a run has failed.
@@ -2026,6 +2088,7 @@ def new_task(
     secrets=None,
     base=None,
     returns=None,
+    retry=None,
 ) -> RunTemplate:
     """Creates a new task
@@ -2061,6 +2124,7 @@ def new_task(
                             * A dictionary of configurations to use when logging. Further info per object type and
                               artifact type can be given there. The artifact key must appear in the dictionary as
                               "key": "the_key".
+    :param retry:           Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
     """
     if base:
@@ -2086,6 +2150,7 @@ def new_task(
     run.spec.hyper_param_options.selector = (
         selector or run.spec.hyper_param_options.selector
     )
+    run.spec.retry = retry or run.spec.retry
     return run

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -804,25 +804,45 @@ class V3IOTSDBConnector(TSDBConnector):
     @staticmethod
     def _get_sql_query(
         *,
-        endpoint_id: str,
         table_path: str,
+        endpoint_id: Optional[str] = None,
+        application_names: Optional[list[str]] = None,
         name: str = mm_schemas.ResultData.RESULT_NAME,
         metric_and_app_names: Optional[list[tuple[str, str]]] = None,
         columns: Optional[list[str]] = None,
+        group_by_columns: Optional[list[str]] = None,
     ) -> str:
         """Get the SQL query for the results/metrics table"""
+        if metric_and_app_names and not endpoint_id:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "If metric_and_app_names is provided, endpoint_id must also be provided"
+            )
+        if metric_and_app_names and application_names:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Cannot provide both metric_and_app_names and application_names"
+            )
         if columns:
             selection = ",".join(columns)
         else:
             selection = "*"
         with StringIO() as query:
-            query.write(
-                f"SELECT {selection} FROM '{table_path}' "
-                f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
-            )
+            where_added = False
+            query.write(f"SELECT {selection} FROM '{table_path}'")
+            if endpoint_id:
+                query.write(
+                    f" WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
+                )
+                where_added = True
             if metric_and_app_names:
-                query.write(" AND (")
+                if where_added:
+                    query.write(" AND (")
+                else:
+                    query.write(" WHERE (")
+                    where_added = True
                 for i, (app_name, result_name) in enumerate(metric_and_app_names):
                     sub_cond = (
@@ -835,6 +855,22 @@ class V3IOTSDBConnector(TSDBConnector):
                 query.write(")")
+            if application_names:
+                if where_added:
+                    query.write(" AND (")
+                else:
+                    query.write(" WHERE (")
+                for i, app_name in enumerate(application_names):
+                    sub_cond = f"{mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}'"
+                    if i != 0:  # not first sub condition
+                        query.write(" OR ")
+                    query.write(sub_cond)
+                query.write(")")
+            if group_by_columns:
+                query.write(" GROUP BY ")
+                query.write(",".join(group_by_columns))
             query.write(";")
             return query.getvalue()
@@ -1272,7 +1308,49 @@ class V3IOTSDBConnector(TSDBConnector):
         end: Optional[Union[datetime, str]] = None,
         application_names: Optional[Union[str, list[str]]] = None,
     ) -> dict[str, int]:
-        raise NotImplementedError
+        start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
+        group_by_columns = [
+            mm_schemas.ApplicationEvent.APPLICATION_NAME,
+            mm_schemas.ApplicationEvent.ENDPOINT_ID,
+        ]
+        def get_application_endpoints_records(
+            record_type: Literal["metrics", "results"],
+        ):
+            if record_type == "results":
+                table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
+            else:
+                table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
+            sql_query = self._get_sql_query(
+                table_path=table_path,
+                columns=[mm_schemas.WriterEvent.START_INFER_TIME],
+                group_by_columns=group_by_columns,
+                application_names=application_names,
+            )
+            return self.frames_client.read(
+                backend=_TSDB_BE,
+                start=start,
+                end=end,
+                query=sql_query,
+            )
+        df_results = get_application_endpoints_records("results")
+        df_metrics = get_application_endpoints_records("metrics")
+        if df_results.empty and df_metrics.empty:
+            return {}
+        # Combine the two dataframes and count unique endpoints per application
+        combined_df = pd.concat([df_results, df_metrics], ignore_index=True)
+        if combined_df.empty:
+            return {}
+        combined_df.drop_duplicates(subset=group_by_columns, inplace=True)
+        grouped_df = combined_df.groupby(
+            mm_schemas.WriterEvent.APPLICATION_NAME
+        ).count()
+        return grouped_df[mm_schemas.WriterEvent.ENDPOINT_ID].to_dict()
     def calculate_latest_metrics(
         self,
@@ -1282,4 +1360,93 @@ class V3IOTSDBConnector(TSDBConnector):
     ) -> list[
         Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
     ]:
-        raise NotImplementedError
+        metric_list = []
+        start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
+        # Get the latest results
+        def get_latest_metrics_records(
+            record_type: Literal["metrics", "results"],
+        ) -> pd.DataFrame:
+            group_by_columns = [mm_schemas.ApplicationEvent.APPLICATION_NAME]
+            if record_type == "results":
+                table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
+                columns = [
+                    f"last({mm_schemas.ResultData.RESULT_STATUS})",
+                    f"last({mm_schemas.ResultData.RESULT_VALUE})",
+                    f"last({mm_schemas.ResultData.RESULT_KIND})",
+                ]
+                group_by_columns += [
+                    mm_schemas.ResultData.RESULT_NAME,
+                ]
+            else:
+                table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
+                columns = [f"last({mm_schemas.MetricData.METRIC_VALUE})"]
+                group_by_columns += [
+                    mm_schemas.MetricData.METRIC_NAME,
+                ]
+            sql_query = self._get_sql_query(
+                table_path=table_path,
+                columns=columns,
+                group_by_columns=group_by_columns,
+                application_names=application_names,
+            )
+            return self.frames_client.read(
+                backend=_TSDB_BE,
+                start=start,
+                end=end,
+                query=sql_query,
+            )
+        df_results = get_latest_metrics_records("results")
+        df_metrics = get_latest_metrics_records("metrics")
+        if df_results.empty and df_metrics.empty:
+            return metric_list
+        # Convert the results DataFrame to a list of ApplicationResultRecord
+        def build_metric_objects() -> (
+            list[
+                Union[
+                    mm_schemas.ApplicationResultRecord,
+                    mm_schemas.ApplicationMetricRecord,
+                ]
+            ]
+        ):
+            metric_objects = []
+            if not df_results.empty:
+                df_results.rename(
+                    columns={
+                        f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
+                        f"last({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS,
+                        f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
+                    },
+                    inplace=True,
+                )
+                for _, row in df_results.iterrows():
+                    metric_objects.append(
+                        mm_schemas.ApplicationResultRecord(
+                            result_name=row[mm_schemas.ResultData.RESULT_NAME],
+                            kind=row[mm_schemas.ResultData.RESULT_KIND],
+                            status=row[mm_schemas.ResultData.RESULT_STATUS],
+                            value=row[mm_schemas.ResultData.RESULT_VALUE],
+                        )
+                    )
+            if not df_metrics.empty:
+                df_metrics.rename(
+                    columns={
+                        f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
+                    },
+                    inplace=True,
+                )
+                for _, row in df_metrics.iterrows():
+                    metric_objects.append(
+                        mm_schemas.ApplicationMetricRecord(
+                            metric_name=row[mm_schemas.MetricData.METRIC_NAME],
+                            value=row[mm_schemas.MetricData.METRIC_VALUE],
+                        )
+                    )
+            return metric_objects
+        return build_metric_objects()

mlrun/package/packagers_manager.py CHANGED Viewed

@@ -21,6 +21,7 @@ from typing import Any, Optional, Union
 import mlrun.errors
 from mlrun.artifacts import Artifact
+from mlrun.artifacts.base import verify_target_path
 from mlrun.datastore import DataItem, get_store_resource, store_manager
 from mlrun.errors import MLRunInvalidArgumentError
 from mlrun.utils import logger
@@ -276,6 +277,7 @@ class PackagersManager:
         if data_item.get_artifact_type():
             # Get the artifact object in the data item:
             artifact, _ = store_manager.get_store_artifact(url=data_item.artifact_url)
+            verify_target_path(artifact)
             # Get the key from the artifact's metadata and instructions from the artifact's spec:
             artifact_key = artifact.metadata.key
             packaging_instructions = artifact.spec.unpackaging_instructions

mlrun/projects/operations.py CHANGED Viewed

@@ -20,7 +20,6 @@ import mlrun
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas.function
 import mlrun.common.schemas.workflow
-import mlrun_pipelines.common.models
 import mlrun_pipelines.models
 from mlrun.utils import hub_prefix
@@ -82,6 +81,7 @@ def run_function(
     builder_env: Optional[list] = None,
     reset_on_run: Optional[bool] = None,
     output_path: Optional[str] = None,
+    retry: Optional[Union[mlrun.model.Retry, dict]] = None,
 ) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
     """Run a local or remote task as part of a local/kubeflow pipeline
@@ -177,6 +177,7 @@ def run_function(
                             This ensures latest code changes are executed. This argument must be used in
                             conjunction with the local=True argument.
     :param output_path:     path to store artifacts, when running in a workflow this will be set automatically
+    :param retry:           Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
     :return: MLRun RunObject or PipelineNodeWrapper
     """
     if artifact_path:
@@ -197,6 +198,7 @@ def run_function(
         returns=returns,
         base=base_task,
         selector=selector,
+        retry=retry,
     )
     task.spec.verbose = task.spec.verbose or verbose
@@ -205,6 +207,11 @@ def run_function(
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Scheduling jobs is not supported when running a workflow with the kfp engine."
             )
+        if retry:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Retrying jobs is not supported when running a workflow with the kfp engine. "
+                "Use KFP set_retry instead."
+            )
         return function.as_step(
             name=name, runspec=task, workdir=workdir, outputs=outputs, labels=labels
         )

mlrun 1.10.0rc11__py3-none-any.whl → 1.10.0rc13__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc11py3-none-any.whl → 1.10.0rc13py3-none-any.whl