PyPI - mlrun - Versions diffs - 1.10.0rc13__py3-none-any.whl → 1.10.0rc15__py3-none-any.whl - Mend

mlrun 1.10.0rc13py3-none-any.whl → 1.10.0rc15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (47) hide show

mlrun/artifacts/base.py +0 -31
mlrun/artifacts/llm_prompt.py +106 -20
mlrun/artifacts/manager.py +0 -5
mlrun/common/constants.py +0 -1
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/functions.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
mlrun/common/schemas/workflow.py +0 -1
mlrun/config.py +1 -1
mlrun/datastore/model_provider/model_provider.py +42 -14
mlrun/datastore/model_provider/openai_provider.py +96 -15
mlrun/db/base.py +14 -0
mlrun/db/httpdb.py +42 -9
mlrun/db/nopdb.py +8 -0
mlrun/execution.py +16 -7
mlrun/model.py +15 -0
mlrun/model_monitoring/__init__.py +1 -0
mlrun/model_monitoring/applications/base.py +176 -20
mlrun/model_monitoring/db/_schedules.py +84 -24
mlrun/model_monitoring/db/tsdb/base.py +72 -1
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
mlrun/model_monitoring/helpers.py +26 -4
mlrun/projects/project.py +38 -12
mlrun/runtimes/daskjob.py +6 -0
mlrun/runtimes/mpijob/abstract.py +6 -0
mlrun/runtimes/mpijob/v1.py +6 -0
mlrun/runtimes/nuclio/application/application.py +2 -0
mlrun/runtimes/nuclio/function.py +6 -0
mlrun/runtimes/nuclio/serving.py +12 -11
mlrun/runtimes/pod.py +21 -0
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/serving/__init__.py +2 -0
mlrun/serving/server.py +95 -26
mlrun/serving/states.py +130 -10
mlrun/utils/helpers.py +36 -12
mlrun/utils/retryer.py +15 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/METADATA +3 -8
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/RECORD +47 -47
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/top_level.txt +0 -0

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -720,6 +720,7 @@ class ServingRuntime(RemoteRuntime):
             "track_models": self.spec.track_models,
             "default_content_type": self.spec.default_content_type,
             "model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
+            # TODO: find another way to pass this (needed for local run)
             "filename": getattr(self.spec, "filename", None),
         }
@@ -788,17 +789,13 @@ class ServingRuntime(RemoteRuntime):
             monitoring_mock=self.spec.track_models,
         )
-        if (
-            isinstance(self.spec.graph, RootFlowStep)
-            and self.spec.graph.include_monitored_step()
-        ):
-            server.graph = add_system_steps_to_graph(
-                server.project,
-                server.graph,
-                self.spec.track_models,
-                server.context,
-                self.spec,
-            )
+        server.graph = add_system_steps_to_graph(
+            server.project,
+            server.graph,
+            self.spec.track_models,
+            server.context,
+            self.spec,
+        )
         if workdir:
             os.chdir(old_workdir)
@@ -858,6 +855,7 @@ class ServingRuntime(RemoteRuntime):
             description=self.spec.description,
             workdir=self.spec.workdir,
             image_pull_secret=self.spec.image_pull_secret,
+            build=self.spec.build,
             node_name=self.spec.node_name,
             node_selector=self.spec.node_selector,
             affinity=self.spec.affinity,
@@ -868,6 +866,9 @@ class ServingRuntime(RemoteRuntime):
             security_context=self.spec.security_context,
             state_thresholds=self.spec.state_thresholds,
             serving_spec=self._get_serving_spec(),
+            track_models=self.spec.track_models,
+            parameters=self.spec.parameters,
+            graph=self.spec.graph,
         )
         job = KubejobRuntime(
             spec=spec,

mlrun/runtimes/pod.py CHANGED Viewed

@@ -104,6 +104,9 @@ class KubeResourceSpec(FunctionSpec):
         "security_context",
         "state_thresholds",
         "serving_spec",
+        "track_models",
+        "parameters",
+        "graph",
     ]
     _default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
         "volumes",
@@ -180,6 +183,9 @@ class KubeResourceSpec(FunctionSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        track_models=None,
+        parameters=None,
+        graph=None,
     ):
         super().__init__(
             command=command,
@@ -226,6 +232,10 @@ class KubeResourceSpec(FunctionSpec):
             or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
         )
         self.serving_spec = serving_spec
+        self.track_models = track_models
+        self.parameters = parameters
+        self._graph = None
+        self.graph = graph
         # Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
         # _dict_fields and doesn't have a setter.
         self._termination_grace_period_seconds = None
@@ -303,6 +313,17 @@ class KubeResourceSpec(FunctionSpec):
     def termination_grace_period_seconds(self) -> typing.Optional[int]:
         return self._termination_grace_period_seconds
+    @property
+    def graph(self):
+        """states graph, holding the serving workflow/DAG topology"""
+        return self._graph
+    @graph.setter
+    def graph(self, graph):
+        from ..serving.states import graph_root_setter
+        graph_root_setter(self, graph)
     def _serialize_field(
         self, struct: dict, field_name: typing.Optional[str] = None, strip: bool = False
     ) -> typing.Any:

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -59,6 +59,9 @@ class RemoteSparkSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -89,6 +92,9 @@ class RemoteSparkSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.provider = provider

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -169,6 +169,9 @@ class Spark3JobSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -199,6 +202,9 @@ class Spark3JobSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.driver_resources = driver_resources or {}

mlrun/serving/__init__.py CHANGED Viewed

@@ -28,6 +28,7 @@ __all__ = [
     "Model",
     "ModelSelector",
     "MonitoredStep",
+    "LLModel",
 ]
 from .routers import ModelRouter, VotingEnsemble  # noqa
@@ -47,6 +48,7 @@ from .states import (
     Model,
     ModelSelector,
     MonitoredStep,
+    LLModel,
 )  # noqa
 from .v1_serving import MLModelServer, new_v1_model_server  # noqa
 from .v2_serving import V2ModelServer  # noqa

mlrun/serving/server.py CHANGED Viewed

@@ -15,6 +15,7 @@
 __all__ = ["GraphServer", "create_graph_server", "GraphContext", "MockEvent"]
 import asyncio
+import base64
 import copy
 import json
 import os
@@ -384,6 +385,7 @@ def add_monitoring_general_steps(
     graph: RootFlowStep,
     context,
     serving_spec,
+    pause_until_background_task_completion: bool,
 ) -> tuple[RootFlowStep, FlowStep]:
     """
     Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
@@ -392,18 +394,22 @@ def add_monitoring_general_steps(
         "background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
         --> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
     """
+    background_task_status_step = None
+    if pause_until_background_task_completion:
+        background_task_status_step = graph.add_step(
+            "mlrun.serving.system_steps.BackgroundTaskStatus",
+            "background_task_status_step",
+            model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
+        )
     monitor_flow_step = graph.add_step(
-        "mlrun.serving.system_steps.BackgroundTaskStatus",
-        "background_task_status_step",
-        model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
-    )
-    graph.add_step(
         "storey.Filter",
         "filter_none",
         _fn="(event is not None)",
-        after="background_task_status_step",
+        after="background_task_status_step" if background_task_status_step else None,
         model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
     )
+    if background_task_status_step:
+        monitor_flow_step = background_task_status_step
     graph.add_step(
         "mlrun.serving.system_steps.MonitoringPreProcessor",
         "monitoring_pre_processor_step",
@@ -466,14 +472,28 @@ def add_monitoring_general_steps(
 def add_system_steps_to_graph(
-    project: str, graph: RootFlowStep, track_models: bool, context, serving_spec
+    project: str,
+    graph: RootFlowStep,
+    track_models: bool,
+    context,
+    serving_spec,
+    pause_until_background_task_completion: bool = True,
 ) -> RootFlowStep:
+    if not (isinstance(graph, RootFlowStep) and graph.include_monitored_step()):
+        return graph
     monitored_steps = graph.get_monitored_steps()
     graph = add_error_raiser_step(graph, monitored_steps)
     if track_models:
+        background_task_status_step = None
         graph, monitor_flow_step = add_monitoring_general_steps(
-            project, graph, context, serving_spec
+            project,
+            graph,
+            context,
+            serving_spec,
+            pause_until_background_task_completion,
         )
+        if background_task_status_step:
+            monitor_flow_step = background_task_status_step
         # Connect each model runner to the monitoring step:
         for step_name, step in monitored_steps.items():
             if monitor_flow_step.after:
@@ -485,6 +505,10 @@ def add_system_steps_to_graph(
                 monitor_flow_step.after = [
                     step_name,
                 ]
+    context.logger.info_with(
+        "Server graph after adding system steps",
+        graph=str(graph.steps),
+    )
     return graph
@@ -494,18 +518,13 @@ def v2_serving_init(context, namespace=None):
     context.logger.info("Initializing server from spec")
     spec = mlrun.utils.get_serving_spec()
     server = GraphServer.from_dict(spec)
-    if isinstance(server.graph, RootFlowStep) and server.graph.include_monitored_step():
-        server.graph = add_system_steps_to_graph(
-            server.project,
-            copy.deepcopy(server.graph),
-            spec.get("track_models"),
-            context,
-            spec,
-        )
-        context.logger.info_with(
-            "Server graph after adding system steps",
-            graph=str(server.graph.steps),
-        )
+    server.graph = add_system_steps_to_graph(
+        server.project,
+        copy.deepcopy(server.graph),
+        spec.get("track_models"),
+        context,
+        spec,
+    )
     if config.log_level.lower() == "debug":
         server.verbose = True
@@ -544,17 +563,57 @@ async def async_execute_graph(
     data: DataItem,
     batching: bool,
     batch_size: Optional[int],
+    read_as_lists: bool,
+    nest_under_inputs: bool,
 ) -> list[Any]:
     spec = mlrun.utils.get_serving_spec()
-    source_filename = spec.get("filename", None)
     namespace = {}
-    if source_filename:
-        with open(source_filename) as f:
-            exec(f.read(), namespace)
+    code = os.getenv("MLRUN_EXEC_CODE")
+    if code:
+        code = base64.b64decode(code).decode("utf-8")
+        exec(code, namespace)
+    else:
+        # TODO: find another way to get the local file path, or ensure that MLRUN_EXEC_CODE
+        #  gets set in local flow and not just in the remote pod
+        source_filename = spec.get("filename", None)
+        if source_filename:
+            with open(source_filename) as f:
+                exec(f.read(), namespace)
     server = GraphServer.from_dict(spec)
+    if server.model_endpoint_creation_task_name:
+        context.logger.info(
+            f"Waiting for model endpoint creation task '{server.model_endpoint_creation_task_name}'..."
+        )
+        background_task = (
+            mlrun.get_run_db().wait_for_background_task_to_reach_terminal_state(
+                project=server.project,
+                name=server.model_endpoint_creation_task_name,
+            )
+        )
+        task_state = background_task.status.state
+        if task_state == mlrun.common.schemas.BackgroundTaskState.failed:
+            raise mlrun.errors.MLRunRuntimeError(
+                "Aborting job due to model endpoint creation background task failure"
+            )
+        elif task_state != mlrun.common.schemas.BackgroundTaskState.succeeded:
+            # this shouldn't happen, but we need to know if it does
+            raise mlrun.errors.MLRunRuntimeError(
+                "Aborting job because the model endpoint creation background task did not succeed "
+                f"(status='{task_state}')"
+            )
+    server.graph = add_system_steps_to_graph(
+        server.project,
+        copy.deepcopy(server.graph),
+        spec.get("track_models"),
+        context,
+        spec,
+        pause_until_background_task_completion=False,  # we've already awaited it
+    )
     if config.log_level.lower() == "debug":
         server.verbose = True
     context.logger.info_with("Initializing states", namespace=namespace)
@@ -588,7 +647,9 @@ async def async_execute_graph(
     batch = []
     for index, row in df.iterrows():
-        data = row.to_dict()
+        data = row.to_list() if read_as_lists else row.to_dict()
+        if nest_under_inputs:
+            data = {"inputs": data}
         if batching:
             batch.append(data)
             if len(batch) == batch_size:
@@ -612,6 +673,8 @@ def execute_graph(
     data: DataItem,
     batching: bool = False,
     batch_size: Optional[int] = None,
+    read_as_lists: bool = False,
+    nest_under_inputs: bool = False,
 ) -> (list[Any], Any):
     """
     Execute graph as a job, from start to finish.
@@ -621,10 +684,16 @@ def execute_graph(
     :param batching: Whether to push one or more batches into the graph rather than row by row.
     :param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
         be pushed into the graph in one batch.
+    :param read_as_lists: Whether to read each row as a list instead of a dictionary.
+    :param nest_under_inputs: Whether to wrap each row with {"inputs": ...}.
     :return: A list of responses.
     """
-    return asyncio.run(async_execute_graph(context, data, batching, batch_size))
+    return asyncio.run(
+        async_execute_graph(
+            context, data, batching, batch_size, read_as_lists, nest_under_inputs
+        )
+    )
 def _set_callbacks(server, context):

mlrun/serving/states.py CHANGED Viewed

@@ -1081,6 +1081,7 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         "raise_exception",
         "artifact_uri",
         "shared_runnable_name",
+        "shared_proxy_mapping",
     ]
     kind = "model"
@@ -1089,12 +1090,16 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         name: str,
         raise_exception: bool = True,
         artifact_uri: Optional[str] = None,
+        shared_proxy_mapping: Optional[dict] = None,
         **kwargs,
     ):
         super().__init__(name=name, raise_exception=raise_exception, **kwargs)
         if artifact_uri is not None and not isinstance(artifact_uri, str):
             raise MLRunInvalidArgumentError("'artifact_uri' argument must be a string")
         self.artifact_uri = artifact_uri
+        self.shared_proxy_mapping: dict[
+            str : Union[str, ModelArtifact, LLMPromptArtifact]
+        ] = shared_proxy_mapping
         self.invocation_artifact: Optional[LLMPromptArtifact] = None
         self.model_artifact: Optional[ModelArtifact] = None
         self.model_provider: Optional[ModelProvider] = None
@@ -1125,10 +1130,13 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         else:
             self.model_artifact = artifact
-    def _get_artifact_object(self) -> Union[ModelArtifact, LLMPromptArtifact, None]:
-        if self.artifact_uri:
-            if mlrun.datastore.is_store_uri(self.artifact_uri):
-                artifact, _ = mlrun.store_manager.get_store_artifact(self.artifact_uri)
+    def _get_artifact_object(
+        self, proxy_uri: Optional[str] = None
+    ) -> Union[ModelArtifact, LLMPromptArtifact, None]:
+        uri = proxy_uri or self.artifact_uri
+        if uri:
+            if mlrun.datastore.is_store_uri(uri):
+                artifact, _ = mlrun.store_manager.get_store_artifact(uri)
                 return artifact
             else:
                 raise ValueError(
@@ -1148,10 +1156,12 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         """Override to implement prediction logic if the logic requires asyncio."""
         return body
-    def run(self, body: Any, path: str) -> Any:
+    def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
         return self.predict(body)
-    async def run_async(self, body: Any, path: str) -> Any:
+    async def run_async(
+        self, body: Any, path: str, origin_name: Optional[str] = None
+    ) -> Any:
         return await self.predict_async(body)
     def get_local_model_path(self, suffix="") -> (str, dict):
@@ -1186,6 +1196,81 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         return None, None
+class LLModel(Model):
+    def __init__(self, name: str, **kwargs):
+        super().__init__(name, **kwargs)
+    def predict(
+        self, body: Any, messages: list[dict], model_configuration: dict
+    ) -> Any:
+        if isinstance(
+            self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
+        ) and isinstance(self.model_provider, ModelProvider):
+            body["result"] = self.model_provider.invoke(
+                messages=messages,
+                as_str=True,
+                **(model_configuration or {}),
+            )
+        return body
+    async def predict_async(
+        self, body: Any, messages: list[dict], model_configuration: dict
+    ) -> Any:
+        if isinstance(
+            self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
+        ) and isinstance(self.model_provider, ModelProvider):
+            body["result"] = await self.model_provider.async_invoke(
+                messages=messages,
+                as_str=True,
+                **(model_configuration or {}),
+            )
+        return body
+    def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
+        messages, model_configuration = self.enrich_prompt(body, origin_name)
+        return self.predict(
+            body, messages=messages, model_configuration=model_configuration
+        )
+    async def run_async(
+        self, body: Any, path: str, origin_name: Optional[str] = None
+    ) -> Any:
+        messages, model_configuration = self.enrich_prompt(body, origin_name)
+        return await self.predict_async(
+            body, messages=messages, model_configuration=model_configuration
+        )
+    def enrich_prompt(
+        self, body: dict, origin_name: str
+    ) -> Union[tuple[list[dict], dict], tuple[None, None]]:
+        if origin_name and self.shared_proxy_mapping:
+            llm_prompt_artifact = self.shared_proxy_mapping.get(origin_name)
+            if isinstance(llm_prompt_artifact, str):
+                llm_prompt_artifact = self._get_artifact_object(llm_prompt_artifact)
+                self.shared_proxy_mapping[origin_name] = llm_prompt_artifact
+        else:
+            llm_prompt_artifact = (
+                self.invocation_artifact or self._get_artifact_object()
+            )
+        if not (
+            llm_prompt_artifact and isinstance(llm_prompt_artifact, LLMPromptArtifact)
+        ):
+            logger.warning(
+                "LLMModel must be provided with LLMPromptArtifact",
+                llm_prompt_artifact=llm_prompt_artifact,
+            )
+            return None, None
+        prompt_legend = llm_prompt_artifact.spec.prompt_legend
+        prompt_template = deepcopy(llm_prompt_artifact.read_prompt())
+        kwargs = {
+            place_holder: body.get(body_map["field"])
+            for place_holder, body_map in prompt_legend.items()
+        }
+        for d in prompt_template:
+            d["content"] = d["content"].format(**kwargs)
+        return prompt_template, llm_prompt_artifact.spec.model_configuration
 class ModelSelector:
     """Used to select which models to run on each event."""
@@ -1292,6 +1377,7 @@ class ModelRunnerStep(MonitoredStep):
     """
     kind = "model_runner"
+    _dict_fields = MonitoredStep._dict_fields + ["_shared_proxy_mapping"]
     def __init__(
         self,
@@ -1311,6 +1397,7 @@ class ModelRunnerStep(MonitoredStep):
         )
         self.raise_exception = raise_exception
         self.shape = "folder"
+        self._shared_proxy_mapping = {}
     def add_shared_model_proxy(
         self,
@@ -1360,9 +1447,9 @@ class ModelRunnerStep(MonitoredStep):
                                     in path.
         :param override:            bool allow override existing model on the current ModelRunnerStep.
         """
-        model_class = Model(
-            name=endpoint_name,
-            shared_runnable_name=shared_model_name,
+        model_class, model_params = (
+            "mlrun.serving.Model",
+            {"name": endpoint_name, "shared_runnable_name": shared_model_name},
         )
         if isinstance(model_artifact, str):
             model_artifact_uri = model_artifact
@@ -1389,6 +1476,20 @@ class ModelRunnerStep(MonitoredStep):
                     f"ModelRunnerStep can only add proxy models that were added to the root flow step, "
                     f"model {shared_model_name} is not in the shared models."
                 )
+        if shared_model_name not in self._shared_proxy_mapping:
+            self._shared_proxy_mapping[shared_model_name] = {
+                endpoint_name: model_artifact.uri
+                if isinstance(model_artifact, (ModelArtifact, LLMPromptArtifact))
+                else model_artifact
+            }
+        else:
+            self._shared_proxy_mapping[shared_model_name].update(
+                {
+                    endpoint_name: model_artifact.uri
+                    if isinstance(model_artifact, (ModelArtifact, LLMPromptArtifact))
+                    else model_artifact
+                }
+            )
         self.add_model(
             endpoint_name=endpoint_name,
             model_class=model_class,
@@ -1401,6 +1502,7 @@ class ModelRunnerStep(MonitoredStep):
             outputs=outputs,
             input_path=input_path,
             result_path=result_path,
+            **model_params,
         )
     def add_model(
@@ -1659,6 +1761,7 @@ class ModelRunnerStep(MonitoredStep):
             model_selector=model_selector,
             runnables=model_objects,
             execution_mechanism_by_runnable_name=execution_mechanism_by_model_name,
+            shared_proxy_mapping=self._shared_proxy_mapping or None,
             name=self.name,
             context=context,
         )
@@ -2494,7 +2597,24 @@ class RootFlowStep(FlowStep):
                 max_threads=self.shared_max_threads,
                 pool_factor=self.pool_factor,
             )
+            monitored_steps = self.get_monitored_steps().values()
+            for monitored_step in monitored_steps:
+                if isinstance(monitored_step, ModelRunnerStep):
+                    for model, model_params in self.shared_models.values():
+                        if "shared_proxy_mapping" in model_params:
+                            model_params["shared_proxy_mapping"].update(
+                                deepcopy(
+                                    monitored_step._shared_proxy_mapping.get(
+                                        model_params.get("name"), {}
+                                    )
+                                )
+                            )
+                        else:
+                            model_params["shared_proxy_mapping"] = deepcopy(
+                                monitored_step._shared_proxy_mapping.get(
+                                    model_params.get("name"), {}
+                                )
+                            )
             for model, model_params in self.shared_models.values():
                 model = get_class(model, namespace).from_dict(
                     model_params, init_with_params=True

mlrun/utils/helpers.py CHANGED Viewed

@@ -162,14 +162,6 @@ def get_artifact_target(item: dict, project=None):
     return item["spec"].get("target_path")
-# TODO: Remove once data migration v5 is obsolete
-def is_legacy_artifact(artifact):
-    if isinstance(artifact, dict):
-        return "metadata" not in artifact
-    else:
-        return not hasattr(artifact, "metadata")
 logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
 missing = object()
@@ -1050,7 +1042,14 @@ def fill_function_hash(function_dict, tag=""):
 def retry_until_successful(
-    backoff: int, timeout: int, logger, verbose: bool, _function, *args, **kwargs
+    backoff: int,
+    timeout: int,
+    logger,
+    verbose: bool,
+    _function,
+    *args,
+    fatal_exceptions=(),
+    **kwargs,
 ):
     """
     Runs function with given *args and **kwargs.
@@ -1063,14 +1062,31 @@ def retry_until_successful(
     :param verbose: whether to log the failure on each retry
     :param _function: function to run
     :param args: functions args
+    :param fatal_exceptions: exception types that should not be retried
     :param kwargs: functions kwargs
     :return: function result
     """
-    return Retryer(backoff, timeout, logger, verbose, _function, *args, **kwargs).run()
+    return Retryer(
+        backoff,
+        timeout,
+        logger,
+        verbose,
+        _function,
+        *args,
+        fatal_exceptions=fatal_exceptions,
+        **kwargs,
+    ).run()
 async def retry_until_successful_async(
-    backoff: int, timeout: int, logger, verbose: bool, _function, *args, **kwargs
+    backoff: int,
+    timeout: int,
+    logger,
+    verbose: bool,
+    _function,
+    *args,
+    fatal_exceptions=(),
+    **kwargs,
 ):
     """
     Runs function with given *args and **kwargs.
@@ -1082,12 +1098,20 @@ async def retry_until_successful_async(
     :param logger: a logger so we can log the failures
     :param verbose: whether to log the failure on each retry
     :param _function: function to run
+    :param fatal_exceptions: exception types that should not be retried
     :param args: functions args
     :param kwargs: functions kwargs
     :return: function result
     """
     return await AsyncRetryer(
-        backoff, timeout, logger, verbose, _function, *args, **kwargs
+        backoff,
+        timeout,
+        logger,
+        verbose,
+        _function,
+        *args,
+        fatal_exceptions=fatal_exceptions,
+        **kwargs,
     ).run()

mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc15__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc13py3-none-any.whl → 1.10.0rc15py3-none-any.whl