PyPI - mlrun - Versions diffs - 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl - Mend

mlrun 1.10.0rc16py3-none-any.whl → 1.10.1rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (101) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +9 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/common/secrets.py +22 -1
mlrun/config.py +34 -21
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +265 -7
mlrun/datastore/datastore.py +10 -5
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +367 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +211 -74
mlrun/datastore/model_provider/openai_provider.py +243 -71
mlrun/datastore/s3.py +24 -2
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +27 -19
mlrun/db/httpdb.py +57 -48
mlrun/db/nopdb.py +25 -10
mlrun/execution.py +55 -13
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +2 -0
mlrun/model.py +9 -3
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +388 -138
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +36 -13
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
mlrun/model_monitoring/helpers.py +28 -5
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +16 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +157 -69
mlrun/run.py +97 -20
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +147 -17
mlrun/runtimes/nuclio/function.py +72 -27
mlrun/runtimes/nuclio/serving.py +102 -20
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +54 -13
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +230 -40
mlrun/serving/states.py +605 -232
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +136 -81
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +215 -83
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/mail.py +38 -15
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0

mlrun/serving/states.py CHANGED Viewed

@@ -24,6 +24,7 @@ import inspect
 import os
 import pathlib
 import traceback
+import warnings
 from abc import ABC
 from copy import copy, deepcopy
 from inspect import getfullargspec, signature
@@ -38,17 +39,21 @@ import mlrun.common.schemas as schemas
 from mlrun.artifacts.llm_prompt import LLMPromptArtifact, PlaceholderDefaultDict
 from mlrun.artifacts.model import ModelArtifact
 from mlrun.datastore.datastore_profile import (
-    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaStream,
     DatastoreProfileKafkaTarget,
     DatastoreProfileV3io,
     datastore_profile_read,
 )
-from mlrun.datastore.model_provider.model_provider import ModelProvider
+from mlrun.datastore.model_provider.model_provider import (
+    InvokeResponseFormat,
+    ModelProvider,
+    UsageResponseKeys,
+)
 from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
-from mlrun.utils import get_data_from_path, logger, split_path
+from mlrun.utils import get_data_from_path, logger, set_data_by_path, split_path
 from ..config import config
-from ..datastore import get_stream_pusher
+from ..datastore import _DummyStream, get_stream_pusher
 from ..datastore.utils import (
     get_kafka_brokers_from_dict,
     parse_kafka_url,
@@ -372,20 +377,14 @@ class BaseStep(ModelObj):
                             to event["y"] resulting in {"x": 5, "y": <result>}
         :param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
-                            * **overwrite**:
-                            1. If model endpoints with the same name exist, delete the `latest` one.
-                            2. Create a new model endpoint entry and set it as `latest`.
-                            * **inplace** (default):
-                            1. If model endpoints with the same name exist, update the `latest` entry.
-                            2. Otherwise, create a new entry.
+                            * **overwrite**: If model endpoints with the same name exist, delete the `latest` one;
+                              create a new model endpoint entry and set it as `latest`.
-                            * **archive**:
+                            * **inplace** (default): If model endpoints with the same name exist, update the `latest`
+                              entry; otherwise, create a new entry.
-                            1. If model endpoints with the same name exist, preserve them.
-                            2. Create a new model endpoint with the same name and set it to `latest`.
+                            * **archive**: If model endpoints with the same name exist, preserve them;
+                              create a new model endpoint with the same name and set it to `latest`.
         :param class_args:  class init arguments
         """
@@ -517,7 +516,9 @@ class BaseStep(ModelObj):
         root = self._extract_root_step()
-        if not isinstance(root, RootFlowStep):
+        if not isinstance(root, RootFlowStep) or (
+            isinstance(root, RootFlowStep) and root.engine != "async"
+        ):
             raise GraphError(
                 "ModelRunnerStep can be added to 'Flow' topology graph only"
             )
@@ -541,8 +542,8 @@ class BaseStep(ModelObj):
         # Update model endpoints names in the root step
         root.update_model_endpoints_names(step_model_endpoints_names)
-    @staticmethod
     def _verify_shared_models(
+        self,
         root: "RootFlowStep",
         step: "ModelRunnerStep",
         step_model_endpoints_names: list[str],
@@ -571,35 +572,41 @@ class BaseStep(ModelObj):
             prefix, _ = mlrun.datastore.parse_store_uri(model_artifact_uri)
             # if the model artifact is a prompt, we need to get the model URI
             # to ensure that the shared runnable name is correct
+            llm_artifact_uri = None
             if prefix == mlrun.utils.StorePrefix.LLMPrompt:
                 llm_artifact, _ = mlrun.store_manager.get_store_artifact(
                     model_artifact_uri
                 )
+                llm_artifact_uri = llm_artifact.uri
                 model_artifact_uri = mlrun.utils.remove_tag_from_artifact_uri(
                     llm_artifact.spec.parent_uri
                 )
-            actual_shared_name = root.get_shared_model_name_by_artifact_uri(
-                model_artifact_uri
+            actual_shared_name, shared_model_class, shared_model_params = (
+                root.get_shared_model_by_artifact_uri(model_artifact_uri)
             )
-            if not shared_runnable_name:
-                if not actual_shared_name:
-                    raise GraphError(
-                        f"Can't find shared model for {name} model endpoint"
-                    )
-                else:
-                    step.class_args[schemas.ModelRunnerStepData.MODELS][name][
-                        schemas.ModelsData.MODEL_PARAMETERS.value
-                    ]["shared_runnable_name"] = actual_shared_name
-                    shared_models.append(actual_shared_name)
+            if not actual_shared_name:
+                raise GraphError(
+                    f"Can't find shared model named {shared_runnable_name}"
+                )
+            elif not shared_runnable_name:
+                step.class_args[schemas.ModelRunnerStepData.MODELS][name][
+                    schemas.ModelsData.MODEL_PARAMETERS.value
+                ]["shared_runnable_name"] = actual_shared_name
             elif actual_shared_name != shared_runnable_name:
                 raise GraphError(
                     f"Model endpoint {name} shared runnable name mismatch: "
                     f"expected {actual_shared_name}, got {shared_runnable_name}"
                 )
-            else:
-                shared_models.append(actual_shared_name)
+            shared_models.append(actual_shared_name)
+            self._edit_proxy_model_data(
+                step,
+                name,
+                actual_shared_name,
+                shared_model_params,
+                shared_model_class,
+                llm_artifact_uri or model_artifact_uri,
+            )
         undefined_shared_models = list(
             set(shared_models) - set(root.shared_models.keys())
         )
@@ -608,6 +615,52 @@ class BaseStep(ModelObj):
                 f"The following shared models are not defined in the graph: {undefined_shared_models}."
             )
+    @staticmethod
+    def _edit_proxy_model_data(
+        step: "ModelRunnerStep",
+        name: str,
+        actual_shared_name: str,
+        shared_model_params: dict,
+        shared_model_class: Any,
+        artifact: Union[ModelArtifact, LLMPromptArtifact, str],
+    ):
+        monitoring_data = step.class_args.setdefault(
+            schemas.ModelRunnerStepData.MONITORING_DATA, {}
+        )
+        # edit monitoring data according to the shared model parameters
+        monitoring_data[name][schemas.MonitoringData.INPUT_PATH] = shared_model_params[
+            "input_path"
+        ]
+        monitoring_data[name][schemas.MonitoringData.RESULT_PATH] = shared_model_params[
+            "result_path"
+        ]
+        monitoring_data[name][schemas.MonitoringData.INPUTS] = shared_model_params[
+            "inputs"
+        ]
+        monitoring_data[name][schemas.MonitoringData.OUTPUTS] = shared_model_params[
+            "outputs"
+        ]
+        monitoring_data[name][schemas.MonitoringData.MODEL_CLASS] = (
+            shared_model_class
+            if isinstance(shared_model_class, str)
+            else shared_model_class.__class__.__name__
+        )
+        if actual_shared_name and actual_shared_name not in step._shared_proxy_mapping:
+            step._shared_proxy_mapping[actual_shared_name] = {
+                name: artifact.uri
+                if isinstance(artifact, (ModelArtifact, LLMPromptArtifact))
+                else artifact
+            }
+        elif actual_shared_name:
+            step._shared_proxy_mapping[actual_shared_name].update(
+                {
+                    name: artifact.uri
+                    if isinstance(artifact, (ModelArtifact, LLMPromptArtifact))
+                    else artifact
+                }
+            )
 class TaskStep(BaseStep):
     """task execution step, runs a class or handler"""
@@ -983,20 +1036,14 @@ class RouterStep(TaskStep):
         :param function:   function this step should run in
         :param creation_strategy: Strategy for creating or updating the model endpoint:
-                           * **overwrite**:
+                           * **overwrite**: If model endpoints with the same name exist, delete the `latest` one;
+                             create a new model endpoint entry and set it as `latest`.
-                           1. If model endpoints with the same name exist, delete the `latest` one.
-                           2. Create a new model endpoint entry and set it as `latest`.
+                           * **inplace** (default): If model endpoints with the same name exist, update the `latest`
+                             entry;otherwise, create a new entry.
-                           * **inplace** (default):
-                           1. If model endpoints with the same name exist, update the `latest` entry.
-                           2. Otherwise, create a new entry.
-                           * **archive**:
-                           1. If model endpoints with the same name exist, preserve them.
-                           2. Create a new model endpoint with the same name and set it to `latest`.
+                           * **archive**: If model endpoints with the same name exist, preserve them;
+                             create a new model endpoint with the same name and set it to `latest`.
         """
         if len(self.routes.keys()) >= MAX_MODELS_PER_ROUTER and key not in self.routes:
@@ -1090,6 +1137,7 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         "artifact_uri",
         "shared_runnable_name",
         "shared_proxy_mapping",
+        "execution_mechanism",
     ]
     kind = "model"
@@ -1111,6 +1159,8 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
         self.invocation_artifact: Optional[LLMPromptArtifact] = None
         self.model_artifact: Optional[ModelArtifact] = None
         self.model_provider: Optional[ModelProvider] = None
+        self._artifact_were_loaded = False
+        self._execution_mechanism = None
     def __init_subclass__(cls):
         super().__init_subclass__()
@@ -1130,13 +1180,29 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
                 raise_missing_schema_exception=False,
             )
-    def _load_artifacts(self) -> None:
-        artifact = self._get_artifact_object()
-        if isinstance(artifact, LLMPromptArtifact):
-            self.invocation_artifact = artifact
-            self.model_artifact = self.invocation_artifact.model_artifact
+        # Check if the relevant predict method is implemented when trying to initialize the model
+        if self._execution_mechanism == storey.ParallelExecutionMechanisms.asyncio:
+            if self.__class__.predict_async is Model.predict_async:
+                raise mlrun.errors.ModelRunnerError(
+                    f"{self.name} is running with {self._execution_mechanism} execution_mechanism but predict_async() "
+                    f"is not implemented"
+                )
         else:
-            self.model_artifact = artifact
+            if self.__class__.predict is Model.predict:
+                raise mlrun.errors.ModelRunnerError(
+                    f"{self.name} is running with {self._execution_mechanism} execution_mechanism but predict() "
+                    f"is not implemented"
+                )
+    def _load_artifacts(self) -> None:
+        if not self._artifact_were_loaded:
+            artifact = self._get_artifact_object()
+            if isinstance(artifact, LLMPromptArtifact):
+                self.invocation_artifact = artifact
+                self.model_artifact = self.invocation_artifact.model_artifact
+            else:
+                self.model_artifact = artifact
+            self._artifact_were_loaded = True
     def _get_artifact_object(
         self, proxy_uri: Optional[str] = None
@@ -1158,11 +1224,11 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
     def predict(self, body: Any, **kwargs) -> Any:
         """Override to implement prediction logic. If the logic requires asyncio, override predict_async() instead."""
-        return body
+        raise NotImplementedError("predict() method not implemented")
     async def predict_async(self, body: Any, **kwargs) -> Any:
         """Override to implement prediction logic if the logic requires asyncio."""
-        return body
+        raise NotImplementedError("predict_async() method not implemented")
     def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
         return self.predict(body)
@@ -1205,26 +1271,111 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
 class LLModel(Model):
+    """
+    A model wrapper for handling LLM (Large Language Model) prompt-based inference.
+    This class extends the base `Model` to provide specialized handling for
+    `LLMPromptArtifact` objects, enabling both synchronous and asynchronous
+    invocation of language models.
+    **Model Invocation**:
+    - The execution of enriched prompts is delegated to the `model_provider`
+      configured for the model (e.g., **Hugging Face** or **OpenAI**).
+    - The `model_provider` is responsible for sending the prompt to the correct
+      backend API and returning the generated output.
+    - Users can override the `predict` and `predict_async` methods to customize
+      the behavior of the model invocation.
+    **Prompt Enrichment Overview**:
+    - If an `LLMPromptArtifact` is found, load its prompt template and fill in
+      placeholders using values from the request body.
+    - If the artifact is not an `LLMPromptArtifact`, skip formatting and attempt
+      to retrieve `messages` directly from the request body using the input path.
+    **Simplified Example**:
+    Input body::
+        {"city": "Paris", "days": 3}
+    Prompt template in artifact::
+        [
+            {"role": "system", "content": "You are a travel planning assistant."},
+            {"role": "user", "content": "Create a {{days}}-day itinerary for {{city}}."},
+        ]
+    Result after enrichment::
+        [
+            {"role": "system", "content": "You are a travel planning assistant."},
+            {"role": "user", "content": "Create a 3-day itinerary for Paris."},
+        ]
+    :param name: Name of the model.
+    :param input_path: Path in the request body where input data is located.
+    :param result_path: Path in the response body where model outputs and the statistics
+                        will be stored.
+    """
+    _dict_fields = Model._dict_fields + ["result_path", "input_path"]
     def __init__(
-        self, name: str, input_path: Optional[Union[str, list[str]]], **kwargs
+        self,
+        name: str,
+        input_path: Optional[Union[str, list[str]]] = None,
+        result_path: Optional[Union[str, list[str]]] = None,
+        **kwargs,
     ):
         super().__init__(name, **kwargs)
         self._input_path = split_path(input_path)
+        self._result_path = split_path(result_path)
+        logger.info(
+            "LLModel initialized",
+            model_name=name,
+            input_path=input_path,
+            result_path=result_path,
+        )
     def predict(
         self,
         body: Any,
         messages: Optional[list[dict]] = None,
-        model_configuration: Optional[dict] = None,
+        invocation_config: Optional[dict] = None,
         **kwargs,
     ) -> Any:
+        llm_prompt_artifact = kwargs.get("llm_prompt_artifact")
         if isinstance(
-            self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
+            llm_prompt_artifact, mlrun.artifacts.LLMPromptArtifact
         ) and isinstance(self.model_provider, ModelProvider):
-            body["result"] = self.model_provider.invoke(
+            logger.debug(
+                "Invoking model provider",
+                model_name=self.name,
+                messages=messages,
+                invocation_config=invocation_config,
+            )
+            response_with_stats = self.model_provider.invoke(
                 messages=messages,
-                as_str=True,
-                **(model_configuration or {}),
+                invoke_response_format=InvokeResponseFormat.USAGE,
+                **(invocation_config or {}),
+            )
+            set_data_by_path(
+                path=self._result_path, data=body, value=response_with_stats
+            )
+            logger.debug(
+                "LLModel prediction completed",
+                model_name=self.name,
+                answer=response_with_stats.get("answer"),
+                usage=response_with_stats.get("usage"),
+            )
+        else:
+            logger.warning(
+                "LLModel invocation artifact or model provider not set, skipping prediction",
+                model_name=self.name,
+                invocation_artifact_type=type(llm_prompt_artifact).__name__,
+                model_provider_type=type(self.model_provider).__name__,
             )
         return body
@@ -1232,61 +1383,112 @@ class LLModel(Model):
         self,
         body: Any,
         messages: Optional[list[dict]] = None,
-        model_configuration: Optional[dict] = None,
+        invocation_config: Optional[dict] = None,
         **kwargs,
     ) -> Any:
+        llm_prompt_artifact = kwargs.get("llm_prompt_artifact")
         if isinstance(
-            self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
+            llm_prompt_artifact, mlrun.artifacts.LLMPromptArtifact
         ) and isinstance(self.model_provider, ModelProvider):
-            body["result"] = await self.model_provider.async_invoke(
+            logger.debug(
+                "Async invoking model provider",
+                model_name=self.name,
+                messages=messages,
+                invocation_config=invocation_config,
+            )
+            response_with_stats = await self.model_provider.async_invoke(
                 messages=messages,
-                as_str=True,
-                **(model_configuration or {}),
+                invoke_response_format=InvokeResponseFormat.USAGE,
+                **(invocation_config or {}),
+            )
+            set_data_by_path(
+                path=self._result_path, data=body, value=response_with_stats
+            )
+            logger.debug(
+                "LLModel async prediction completed",
+                model_name=self.name,
+                answer=response_with_stats.get("answer"),
+                usage=response_with_stats.get("usage"),
+            )
+        else:
+            logger.warning(
+                "LLModel invocation artifact or model provider not set, skipping async prediction",
+                model_name=self.name,
+                invocation_artifact_type=type(llm_prompt_artifact).__name__,
+                model_provider_type=type(self.model_provider).__name__,
             )
         return body
     def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
-        messages, model_configuration = self.enrich_prompt(body, origin_name)
+        llm_prompt_artifact = self._get_invocation_artifact(origin_name)
+        messages, invocation_config = self.enrich_prompt(
+            body, origin_name, llm_prompt_artifact
+        )
+        logger.info(
+            "Calling LLModel predict",
+            model_name=self.name,
+            model_endpoint_name=origin_name,
+            messages_len=len(messages) if messages else 0,
+        )
         return self.predict(
-            body, messages=messages, model_configuration=model_configuration
+            body,
+            messages=messages,
+            invocation_config=invocation_config,
+            llm_prompt_artifact=llm_prompt_artifact,
         )
     async def run_async(
         self, body: Any, path: str, origin_name: Optional[str] = None
     ) -> Any:
-        messages, model_configuration = self.enrich_prompt(body, origin_name)
+        llm_prompt_artifact = self._get_invocation_artifact(origin_name)
+        messages, invocation_config = self.enrich_prompt(
+            body, origin_name, llm_prompt_artifact
+        )
+        logger.info(
+            "Calling LLModel async predict",
+            model_name=self.name,
+            model_endpoint_name=origin_name,
+            messages_len=len(messages) if messages else 0,
+        )
         return await self.predict_async(
-            body, messages=messages, model_configuration=model_configuration
+            body,
+            messages=messages,
+            invocation_config=invocation_config,
+            llm_prompt_artifact=llm_prompt_artifact,
         )
     def enrich_prompt(
-        self, body: dict, origin_name: str
+        self,
+        body: dict,
+        origin_name: str,
+        llm_prompt_artifact: Optional[LLMPromptArtifact] = None,
     ) -> Union[tuple[list[dict], dict], tuple[None, None]]:
-        if origin_name and self.shared_proxy_mapping:
-            llm_prompt_artifact = self.shared_proxy_mapping.get(origin_name)
-            if isinstance(llm_prompt_artifact, str):
-                llm_prompt_artifact = self._get_artifact_object(llm_prompt_artifact)
-                self.shared_proxy_mapping[origin_name] = llm_prompt_artifact
-        else:
-            llm_prompt_artifact = (
-                self.invocation_artifact or self._get_artifact_object()
-            )
-        if not (
+        logger.info(
+            "Enriching prompt",
+            model_name=self.name,
+            model_endpoint_name=origin_name,
+        )
+        if not llm_prompt_artifact or not (
             llm_prompt_artifact and isinstance(llm_prompt_artifact, LLMPromptArtifact)
         ):
             logger.warning(
-                "LLMModel must be provided with LLMPromptArtifact",
+                "LLModel must be provided with LLMPromptArtifact",
+                model_name=self.name,
+                artifact_type=type(llm_prompt_artifact).__name__,
                 llm_prompt_artifact=llm_prompt_artifact,
             )
-            return None, None
-        prompt_legend = llm_prompt_artifact.spec.prompt_legend
-        prompt_template = deepcopy(llm_prompt_artifact.read_prompt())
+            prompt_legend, prompt_template, invocation_config = {}, [], {}
+        else:
+            prompt_legend = llm_prompt_artifact.spec.prompt_legend
+            prompt_template = deepcopy(llm_prompt_artifact.read_prompt())
+            invocation_config = llm_prompt_artifact.spec.invocation_config
         input_data = copy(get_data_from_path(self._input_path, body))
-        if isinstance(input_data, dict):
+        if isinstance(input_data, dict) and prompt_template:
             kwargs = (
                 {
                     place_holder: input_data.get(body_map["field"])
                     for place_holder, body_map in prompt_legend.items()
+                    if input_data.get(body_map["field"])
                 }
                 if prompt_legend
                 else {}
@@ -1298,23 +1500,61 @@ class LLModel(Model):
                     message["content"] = message["content"].format(**input_data)
                 except KeyError as e:
                     logger.warning(
-                        "Input data was missing a placeholder, placeholder stay unformatted",
-                        key_error=e,
+                        "Input data missing placeholder, content stays unformatted",
+                        model_name=self.name,
+                        key_error=mlrun.errors.err_to_str(e),
                     )
                     message["content"] = message["content"].format_map(
                         default_place_holders
                     )
+        elif isinstance(input_data, dict) and not prompt_template:
+            # If there is no prompt template, we assume the input data is already in the correct format.
+            logger.debug("Attempting to retrieve messages from the request body.")
+            prompt_template = input_data.get("messages", [])
         else:
             logger.warning(
-                f"Expected input data to be a dict, but received input data from type {type(input_data)} prompt "
-                f"template stay unformatted",
+                "Expected input data to be a dict, prompt template stays unformatted",
+                model_name=self.name,
+                input_data_type=type(input_data).__name__,
             )
-        return prompt_template, llm_prompt_artifact.spec.model_configuration
+        return prompt_template, invocation_config
+    def _get_invocation_artifact(
+        self, origin_name: Optional[str] = None
+    ) -> Union[LLMPromptArtifact, None]:
+        """
+        Get the LLMPromptArtifact object for this model.
+        :param proxy_uri: Optional; URI to the proxy artifact.
+        :return: LLMPromptArtifact object or None if not found.
+        """
+        if origin_name and self.shared_proxy_mapping:
+            llm_prompt_artifact = self.shared_proxy_mapping.get(origin_name)
+            if isinstance(llm_prompt_artifact, str):
+                llm_prompt_artifact = self._get_artifact_object(llm_prompt_artifact)
+                self.shared_proxy_mapping[origin_name] = llm_prompt_artifact
+        elif self._artifact_were_loaded:
+            llm_prompt_artifact = self.invocation_artifact
+        else:
+            self._load_artifacts()
+            llm_prompt_artifact = self.invocation_artifact
+        return llm_prompt_artifact
-class ModelSelector:
+class ModelSelector(ModelObj):
     """Used to select which models to run on each event."""
+    def __init__(self, **kwargs):
+        super().__init__()
+    def __init_subclass__(cls):
+        super().__init_subclass__()
+        cls._dict_fields = list(
+            set(cls._dict_fields)
+            | set(inspect.signature(cls.__init__).parameters.keys())
+        )
+        cls._dict_fields.remove("self")
     def select(
         self, event, available_models: list[Model]
     ) -> Union[list[str], list[Model]]:
@@ -1406,34 +1646,68 @@ class ModelRunnerStep(MonitoredStep):
         model_runner_step.add_model(..., model_class=MyModel(name="my_model"))
         graph.to(model_runner_step)
+    Note when ModelRunnerStep is used in a graph, MLRun automatically imports
+    the default language model class (LLModel) during function deployment.
+    Note ModelRunnerStep can only be added to a graph that has the flow topology and running with async engine.
+    Note see configure_pool_resource method documentation for default number of max threads and max processes.
     :param model_selector: ModelSelector instance whose select() method will be used to select models to run on each
       event. Optional. If not passed, all models will be run.
     :param raise_exception:  If True, an error will be raised when model selection fails or if one of the models raised
       an error. If False, the error will appear in the output event.
-    :raise ModelRunnerError - when a model raise an error the ModelRunnerStep will handle it, collect errors and outputs
-                              from added models, If raise_exception is True will raise ModelRunnerError Else will add
-                              the error msg as part of the event body mapped by model name if more than one model was
-                              added to the ModelRunnerStep
+    :raise ModelRunnerError: when a model raises an error the ModelRunnerStep will handle it, collect errors and
+                              outputs from added models. If raise_exception is True will raise ModelRunnerError. Else
+                              will add the error msg as part of the event body mapped by model name if more than
+                              one model was added to the ModelRunnerStep
     """
     kind = "model_runner"
-    _dict_fields = MonitoredStep._dict_fields + ["_shared_proxy_mapping"]
+    _dict_fields = MonitoredStep._dict_fields + [
+        "_shared_proxy_mapping",
+        "max_processes",
+        "max_threads",
+        "pool_factor",
+    ]
     def __init__(
         self,
         *args,
         name: Optional[str] = None,
         model_selector: Optional[Union[str, ModelSelector]] = None,
+        model_selector_parameters: Optional[dict] = None,
         raise_exception: bool = True,
         **kwargs,
     ):
+        self.max_processes = None
+        self.max_threads = None
+        self.pool_factor = None
+        if isinstance(model_selector, ModelSelector) and model_selector_parameters:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Cannot provide a model_selector object as argument to `model_selector` and also provide "
+                "`model_selector_parameters`."
+            )
+        if model_selector:
+            model_selector_parameters = model_selector_parameters or (
+                model_selector.to_dict()
+                if isinstance(model_selector, ModelSelector)
+                else {}
+            )
+            model_selector = (
+                model_selector
+                if isinstance(model_selector, str)
+                else model_selector.__class__.__name__
+            )
         super().__init__(
             *args,
             name=name,
             raise_exception=raise_exception,
             class_name="mlrun.serving.ModelRunner",
-            class_args=dict(model_selector=model_selector),
+            class_args=dict(model_selector=(model_selector, model_selector_parameters)),
             **kwargs,
         )
         self.raise_exception = raise_exception
@@ -1449,10 +1723,6 @@ class ModelRunnerStep(MonitoredStep):
         model_endpoint_creation_strategy: Optional[
             schemas.ModelEndpointCreationStrategy
         ] = schemas.ModelEndpointCreationStrategy.INPLACE,
-        inputs: Optional[list[str]] = None,
-        outputs: Optional[list[str]] = None,
-        input_path: Optional[str] = None,
-        result_path: Optional[str] = None,
         override: bool = False,
     ) -> None:
         """
@@ -1465,28 +1735,18 @@ class ModelRunnerStep(MonitoredStep):
         :param shared_model_name:   str, the name of the shared model that is already defined within the graph
         :param labels:              model endpoint labels, should be list of str or mapping of str:str
         :param model_endpoint_creation_strategy:   Strategy for creating or updating the model endpoint:
-          * **overwrite**:
-          1. If model endpoints with the same name exist, delete the `latest` one.
-          2. Create a new model endpoint entry and set it as `latest`.
-          * **inplace** (default):
-          1. If model endpoints with the same name exist, update the `latest` entry.
-          2. Otherwise, create a new entry.
-          * **archive**:
-          1. If model endpoints with the same name exist, preserve them.
-          2. Create a new model endpoint with the same name and set it to `latest`.
-        :param inputs:              list of the model inputs (e.g. features) ,if provided will override the inputs
-                                    that been configured in the model artifact, please note that those inputs need to
-                                    be equal in length and order to the inputs that model_class predict method expects
-        :param outputs:             list of the model outputs (e.g. labels) ,if provided will override the outputs
-                                    that been configured in the model artifact, please note that those outputs need to
-                                    be equal to the model_class predict method outputs (length, and order)
-        :param input_path:          input path inside the user event, expect scopes to be defined by dot notation
-                                    (e.g "inputs.my_model_inputs"). expects list or dictionary type object in path.
-        :param result_path:         result path inside the user output event, expect scopes to be defined by dot
-                                    notation (e.g "outputs.my_model_outputs") expects list or dictionary type object
-                                    in path.
+                 * **overwrite**: If model endpoints with the same name exist, delete the `latest` one;
+                   create a new model endpoint entry and set it as `latest`.
+                 * **inplace** (default): If model endpoints with the same name exist, update the `latest` entry;
+                   otherwise, create a new entry.
+                 * **archive**: If model endpoints with the same name exist, preserve them;
+                   create a new model endpoint with the same name and set it to `latest`.
         :param override:            bool allow override existing model on the current ModelRunnerStep.
+        :raise GraphError:  when the shared model is not found in the root flow step shared models.
         """
         model_class, model_params = (
             "mlrun.serving.Model",
@@ -1503,11 +1763,21 @@ class ModelRunnerStep(MonitoredStep):
                 "model_artifact must be a string, ModelArtifact or LLMPromptArtifact"
             )
         root = self._extract_root_step()
+        shared_model_params = {}
         if isinstance(root, RootFlowStep):
-            shared_model_name = (
-                shared_model_name
-                or root.get_shared_model_name_by_artifact_uri(model_artifact_uri)
+            actual_shared_model_name, shared_model_class, shared_model_params = (
+                root.get_shared_model_by_artifact_uri(model_artifact_uri)
             )
+            if not actual_shared_model_name or (
+                shared_model_name and actual_shared_model_name != shared_model_name
+            ):
+                raise GraphError(
+                    f"ModelRunnerStep can only add proxy models that were added to the root flow step, "
+                    f"model {shared_model_name} is not in the shared models."
+                )
+            elif not shared_model_name:
+                shared_model_name = actual_shared_model_name
+                model_params["shared_runnable_name"] = shared_model_name
             if not root.shared_models or (
                 root.shared_models
                 and shared_model_name
@@ -1517,13 +1787,27 @@ class ModelRunnerStep(MonitoredStep):
                     f"ModelRunnerStep can only add proxy models that were added to the root flow step, "
                     f"model {shared_model_name} is not in the shared models."
                 )
-        if shared_model_name not in self._shared_proxy_mapping:
+            monitoring_data = self.class_args.get(
+                schemas.ModelRunnerStepData.MONITORING_DATA, {}
+            )
+            monitoring_data.setdefault(endpoint_name, {})[
+                schemas.MonitoringData.MODEL_CLASS
+            ] = (
+                shared_model_class
+                if isinstance(shared_model_class, str)
+                else shared_model_class.__class__.__name__
+            )
+            self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = (
+                monitoring_data
+            )
+        if shared_model_name and shared_model_name not in self._shared_proxy_mapping:
             self._shared_proxy_mapping[shared_model_name] = {
                 endpoint_name: model_artifact.uri
                 if isinstance(model_artifact, (ModelArtifact, LLMPromptArtifact))
                 else model_artifact
             }
-        else:
+        elif override and shared_model_name:
             self._shared_proxy_mapping[shared_model_name].update(
                 {
                     endpoint_name: model_artifact.uri
@@ -1538,11 +1822,11 @@ class ModelRunnerStep(MonitoredStep):
             model_artifact=model_artifact,
             labels=labels,
             model_endpoint_creation_strategy=model_endpoint_creation_strategy,
+            inputs=shared_model_params.get("inputs"),
+            outputs=shared_model_params.get("outputs"),
+            input_path=shared_model_params.get("input_path"),
+            result_path=shared_model_params.get("result_path"),
             override=override,
-            inputs=inputs,
-            outputs=outputs,
-            input_path=input_path,
-            result_path=result_path,
             **model_params,
         )
@@ -1567,8 +1851,11 @@ class ModelRunnerStep(MonitoredStep):
         Add a Model to this ModelRunner.
         :param endpoint_name:       str, will identify the model in the ModelRunnerStep, and assign model endpoint name
-        :param model_class:         Model class name
+        :param model_class:         Model class name. If LLModel is chosen
+                                    (either by name `LLModel` or by its full path, e.g. mlrun.serving.states.LLModel),
+                                    outputs will be overridden with UsageResponseKeys fields.
         :param execution_mechanism: Parallel execution mechanism to be used to execute this model. Must be one of:
             * "process_pool" – To run in a separate process from a process pool. This is appropriate for CPU or GPU
                 intensive tasks as they would otherwise block the main process by holding Python's Global Interpreter
                 Lock (GIL).
@@ -1578,37 +1865,32 @@ class ModelRunnerStep(MonitoredStep):
                 otherwise block the main event loop thread.
             * "asyncio" – To run in an asyncio task. This is appropriate for I/O tasks that use asyncio, allowing the
                 event loop to continue running while waiting for a response.
-            * "shared_executor" – Reuses an external executor (typically managed by the flow or context) to execute the
-                runnable. Should be used only if you have multiply `ParallelExecution` in the same flow and especially
-                useful when:
-                - You want to share a heavy resource like a large model loaded onto a GPU.
-                - You want to centralize task scheduling or coordination for multiple lightweight tasks.
-                - You aim to minimize overhead from creating new executors or processes/threads per runnable.
-                The runnable is expected to be pre-initialized and reused across events, enabling efficient use of
-                memory and hardware accelerators.
             * "naive" – To run in the main event loop. This is appropriate only for trivial computation and/or file I/O.
                 It means that the runnable will not actually be run in parallel to anything else.
-            :param model_artifact:      model artifact or mlrun model artifact uri
-            :param labels:              model endpoint labels, should be list of str or mapping of str:str
-            :param model_endpoint_creation_strategy:   Strategy for creating or updating the model endpoint:
-              * **overwrite**:
-              1. If model endpoints with the same name exist, delete the `latest` one.
-              2. Create a new model endpoint entry and set it as `latest`.
-              * **inplace** (default):
-              1. If model endpoints with the same name exist, update the `latest` entry.
-              2. Otherwise, create a new entry.
-              * **archive**:
-              1. If model endpoints with the same name exist, preserve them.
-              2. Create a new model endpoint with the same name and set it to `latest`.
-          :param inputs:              list of the model inputs (e.g. features) ,if provided will override the inputs
+        :param model_artifact:      model artifact or mlrun model artifact uri
+        :param labels:              model endpoint labels, should be list of str or mapping of str:str
+        :param model_endpoint_creation_strategy:   Strategy for creating or updating the model endpoint:
+                            * **overwrite**: If model endpoints with the same name exist, delete the `latest` one;
+                              create a new model endpoint entry and set it as `latest`.
+                            * **inplace** (default): If model endpoints with the same name exist, update the `latest`
+                              entry; otherwise, create a new entry.
+                            * **archive**: If model endpoints with the same name exist, preserve them;
+                              create a new model endpoint with the same name and set it to `latest`.
+        :param inputs:              list of the model inputs (e.g. features) ,if provided will override the inputs
                                       that been configured in the model artifact, please note that those inputs need to
                                       be equal in length and order to the inputs that model_class predict method expects
-          :param outputs:             list of the model outputs (e.g. labels) ,if provided will override the outputs
+        :param outputs:             list of the model outputs (e.g. labels) ,if provided will override the outputs
                                       that been configured in the model artifact, please note that those outputs need to
                                       be equal to the model_class predict method outputs (length, and order)
-          :param input_path:          when specified selects the key/path in the event to use as model monitoring inputs
+                                      When using LLModel, the output will be overridden with UsageResponseKeys.fields().
+        :param input_path:          when specified selects the key/path in the event to use as model monitoring inputs
                                       this require that the event body will behave like a dict, expects scopes to be
                                       defined by dot notation (e.g "data.d").
                                       examples: input_path="data.b"
@@ -1618,7 +1900,7 @@ class ModelRunnerStep(MonitoredStep):
                                       be {"f0": [1, 2]}.
                                       if a ``list`` or ``list of lists`` is provided, it must follow the order and
                                       size defined by the input schema.
-          :param result_path:         when specified selects the key/path in the output event to use as model monitoring
+        :param result_path:         when specified selects the key/path in the output event to use as model monitoring
                                       outputs this require that the output event body will behave like a dict,
                                       expects scopes to be defined by dot notation (e.g "data.d").
                                       examples: result_path="out.b"
@@ -1629,14 +1911,22 @@ class ModelRunnerStep(MonitoredStep):
                                       if a ``list`` or ``list of lists`` is provided, it must follow the order and
                                       size defined by the output schema.
-          :param override:            bool allow override existing model on the current ModelRunnerStep.
-          :param model_parameters:    Parameters for model instantiation
+        :param override:            bool allow override existing model on the current ModelRunnerStep.
+        :param model_parameters:    Parameters for model instantiation
         """
         if isinstance(model_class, Model) and model_parameters:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
             )
+        if type(model_class) is LLModel or (
+            isinstance(model_class, str)
+            and model_class.split(".")[-1] == LLModel.__name__
+        ):
+            if outputs:
+                warnings.warn(
+                    "LLModel with existing outputs detected, overriding to default"
+                )
+            outputs = UsageResponseKeys.fields()
         model_parameters = model_parameters or (
             model_class.to_dict() if isinstance(model_class, Model) else {}
         )
@@ -1652,8 +1942,6 @@ class ModelRunnerStep(MonitoredStep):
             except mlrun.errors.MLRunNotFoundError:
                 raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
-        outputs = outputs or self._get_model_output_schema(model_artifact)
         model_artifact = (
             model_artifact.uri
             if isinstance(model_artifact, mlrun.artifacts.Artifact)
@@ -1719,28 +2007,13 @@ class ModelRunnerStep(MonitoredStep):
         self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data
     @staticmethod
-    def _get_model_output_schema(
-        model_artifact: Union[ModelArtifact, LLMPromptArtifact],
-    ) -> Optional[list[str]]:
-        if isinstance(
-            model_artifact,
-            ModelArtifact,
-        ):
-            return [feature.name for feature in model_artifact.spec.outputs]
-        elif isinstance(
-            model_artifact,
-            LLMPromptArtifact,
-        ):
-            _model_artifact = model_artifact.model_artifact
-            return [feature.name for feature in _model_artifact.spec.outputs]
-    @staticmethod
-    def _get_model_endpoint_output_schema(
+    def _get_model_endpoint_schema(
         name: str,
         project: str,
         uid: str,
-    ) -> list[str]:
+    ) -> tuple[list[str], list[str]]:
         output_schema = None
+        input_schema = None
         try:
             model_endpoint: mlrun.common.schemas.model_monitoring.ModelEndpoint = (
                 mlrun.db.get_run_db().get_model_endpoint(
@@ -1751,14 +2024,16 @@ class ModelRunnerStep(MonitoredStep):
                 )
             )
             output_schema = model_endpoint.spec.label_names
+            input_schema = model_endpoint.spec.feature_names
         except (
             mlrun.errors.MLRunNotFoundError,
             mlrun.errors.MLRunInvalidArgumentError,
-        ):
+        ) as ex:
             logger.warning(
-                f"Model endpoint not found, using default output schema for model {name}"
+                f"Model endpoint not found, using default output schema for model {name}",
+                error=f"{type(ex).__name__}: {ex}",
             )
-        return output_schema
+        return input_schema, output_schema
     def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
         monitoring_data = deepcopy(
@@ -1768,55 +2043,106 @@ class ModelRunnerStep(MonitoredStep):
         )
         if isinstance(monitoring_data, dict):
             for model in monitoring_data:
-                monitoring_data[model][schemas.MonitoringData.OUTPUTS] = (
-                    monitoring_data.get(model, {}).get(schemas.MonitoringData.OUTPUTS)
-                    or self._get_model_endpoint_output_schema(
-                        name=model,
-                        project=self.context.project if self.context else None,
-                        uid=monitoring_data.get(model, {}).get(
-                            mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
-                        ),
-                    )
-                )
-                # Prevent calling _get_model_output_schema for same model more than once
-                self.class_args[
-                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
-                ][model][schemas.MonitoringData.OUTPUTS] = monitoring_data[model][
-                    schemas.MonitoringData.OUTPUTS
-                ]
                 monitoring_data[model][schemas.MonitoringData.INPUT_PATH] = split_path(
                     monitoring_data[model][schemas.MonitoringData.INPUT_PATH]
                 )
                 monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = split_path(
                     monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
                 )
+                mep_output_schema, mep_input_schema = None, None
+                output_schema = self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.OUTPUTS]
+                input_schema = self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.INPUTS]
+                if not output_schema or not input_schema:
+                    # if output or input schema is not provided, try to get it from the model endpoint
+                    mep_input_schema, mep_output_schema = (
+                        self._get_model_endpoint_schema(
+                            model,
+                            self.context.project,
+                            monitoring_data[model].get(
+                                schemas.MonitoringData.MODEL_ENDPOINT_UID, ""
+                            ),
+                        )
+                    )
+                self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.OUTPUTS] = (
+                    output_schema or mep_output_schema
+                )
+                self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.INPUTS] = (
+                    input_schema or mep_input_schema
+                )
             return monitoring_data
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Monitoring data must be a dictionary."
+            )
+    def configure_pool_resource(
+        self,
+        max_processes: Optional[int] = None,
+        max_threads: Optional[int] = None,
+        pool_factor: Optional[int] = None,
+    ) -> None:
+        """
+        Configure the resource limits for the shared models in the graph.
+        :param max_processes: Maximum number of processes to spawn (excluding dedicated processes).
+            Defaults to the number of CPUs or 16 if undetectable.
+        :param max_threads: Maximum number of threads to spawn. Defaults to 32.
+        :param pool_factor: Multiplier to scale the number of process/thread workers per runnable. Defaults to 1.
+        """
+        self.max_processes = max_processes
+        self.max_threads = max_threads
+        self.pool_factor = pool_factor
     def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
         self.context = context
         if not self._is_local_function(context):
             # skip init of non local functions
             return
-        model_selector = self.class_args.get("model_selector")
+        model_selector, model_selector_params = self.class_args.get(
+            "model_selector", (None, None)
+        )
         execution_mechanism_by_model_name = self.class_args.get(
             schemas.ModelRunnerStepData.MODEL_TO_EXECUTION_MECHANISM
         )
         models = self.class_args.get(schemas.ModelRunnerStepData.MODELS, {})
-        if isinstance(model_selector, str):
-            model_selector = get_class(model_selector, namespace)()
+        if model_selector:
+            model_selector = get_class(model_selector, namespace).from_dict(
+                model_selector_params, init_with_params=True
+            )
         model_objects = []
         for model, model_params in models.values():
+            model_name = model_params.get("name")
             model_params[schemas.MonitoringData.INPUT_PATH] = (
                 self.class_args.get(
                     mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
                 )
-                .get(model_params.get("name"), {})
+                .get(model_name, {})
                 .get(schemas.MonitoringData.INPUT_PATH)
             )
+            model_params[schemas.MonitoringData.RESULT_PATH] = (
+                self.class_args.get(
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
+                )
+                .get(model_name, {})
+                .get(schemas.MonitoringData.RESULT_PATH)
+            )
             model = get_class(model, namespace).from_dict(
                 model_params, init_with_params=True
             )
             model._raise_exception = False
+            model._execution_mechanism = execution_mechanism_by_model_name.get(
+                model_name
+            )
             model_objects.append(model)
         self._async_object = ModelRunner(
             model_selector=model_selector,
@@ -1825,6 +2151,9 @@ class ModelRunnerStep(MonitoredStep):
             shared_proxy_mapping=self._shared_proxy_mapping or None,
             name=self.name,
             context=context,
+            max_processes=self.max_processes,
+            max_threads=self.max_threads,
+            pool_factor=self.pool_factor,
         )
@@ -2044,20 +2373,14 @@ class FlowStep(BaseStep):
                             to event["y"] resulting in {"x": 5, "y": <result>}
         :param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
-                            * **overwrite**:
-                            1. If model endpoints with the same name exist, delete the `latest` one.
-                            2. Create a new model endpoint entry and set it as `latest`.
-                            * **inplace** (default):
+                             * **overwrite**: If model endpoints with the same name exist, delete the `latest` one;
+                              create a new model endpoint entry and set it as `latest`.
-                            1. If model endpoints with the same name exist, update the `latest` entry.
-                            2. Otherwise, create a new entry.
+                            * **inplace** (default): If model endpoints with the same name exist, update the `latest`
+                            entry; otherwise, create a new entry.
-                            * **archive**:
-                            1. If model endpoints with the same name exist, preserve them.
-                            2. Create a new model endpoint with the same name and set it to `latest`.
+                            * **archive**: If model endpoints with the same name exist, preserve them;
+                              create a new model endpoint with the same name and set it to `latest`.
         :param class_args:  class init arguments
         """
@@ -2552,35 +2875,64 @@ class RootFlowStep(FlowStep):
         model_class: Union[str, Model],
         execution_mechanism: Union[str, ParallelExecutionMechanisms],
         model_artifact: Union[str, ModelArtifact],
+        inputs: Optional[list[str]] = None,
+        outputs: Optional[list[str]] = None,
+        input_path: Optional[str] = None,
+        result_path: Optional[str] = None,
         override: bool = False,
         **model_parameters,
     ) -> None:
         """
         Add a shared model to the graph, this model will be available to all the ModelRunners in the graph
         :param name:                Name of the shared model (should be unique in the graph)
-        :param model_class:         Model class name
+        :param model_class:         Model class name. If LLModel is chosen
+                                    (either by name `LLModel` or by its full path, e.g. mlrun.serving.states.LLModel),
+                                    outputs will be overridden with UsageResponseKeys fields.
         :param execution_mechanism: Parallel execution mechanism to be used to execute this model. Must be one of:
-            * "process_pool" – To run in a separate process from a process pool. This is appropriate for CPU or GPU
+            * **process_pool**: To run in a separate process from a process pool. This is appropriate for CPU or GPU
                 intensive tasks as they would otherwise block the main process by holding Python's Global Interpreter
                 Lock (GIL).
-            * "dedicated_process" – To run in a separate dedicated process. This is appropriate for CPU or GPU intensive
-                tasks that also require significant Runnable-specific initialization (e.g. a large model).
-            * "thread_pool" – To run in a separate thread. This is appropriate for blocking I/O tasks, as they would
+            * **dedicated_process**: To run in a separate dedicated process. This is appropriate for CPU or GPU
+            intensive tasks that also require significant Runnable-specific initialization (e.g. a large model).
+            * **thread_pool**: To run in a separate thread. This is appropriate for blocking I/O tasks, as they would
                 otherwise block the main event loop thread.
-            * "asyncio" – To run in an asyncio task. This is appropriate for I/O tasks that use asyncio, allowing the
+            * **asyncio**: To run in an asyncio task. This is appropriate for I/O tasks that use asyncio, allowing the
                 event loop to continue running while waiting for a response.
-            * "shared_executor" – Reuses an external executor (typically managed by the flow or context) to execute the
+            * **shared_executor":  Reuses an external executor (typically managed by the flow or context) to execute the
                 runnable. Should be used only if you have multiply `ParallelExecution` in the same flow and especially
                 useful when:
                 - You want to share a heavy resource like a large model loaded onto a GPU.
                 - You want to centralize task scheduling or coordination for multiple lightweight tasks.
                 - You aim to minimize overhead from creating new executors or processes/threads per runnable.
                 The runnable is expected to be pre-initialized and reused across events, enabling efficient use of
                 memory and hardware accelerators.
-            * "naive" – To run in the main event loop. This is appropriate only for trivial computation and/or file I/O.
-                It means that the runnable will not actually be run in parallel to anything else.
+            * **naive**: To run in the main event loop. This is appropriate only for trivial computation and/or file
+                I/O. It means that the runnable will not actually be run in parallel to anything else.
             :param model_artifact:      model artifact or mlrun model artifact uri
+            :param inputs:              list of the model inputs (e.g. features) ,if provided will override the inputs
+                                        that been configured in the model artifact, please note that those inputs need
+                                        to be equal in length and order to the inputs that model_class
+                                        predict method expects
+            :param outputs:             list of the model outputs (e.g. labels) ,if provided will override the outputs
+                                        that been configured in the model artifact, please note that those outputs need
+                                        to be equal to the model_class
+                                        predict method outputs (length, and order)
+            :param input_path:          input path inside the user event, expect scopes to be defined by dot notation
+                                        (e.g "inputs.my_model_inputs"). expects list or dictionary type object in path.
+            :param result_path:         result path inside the user output event, expect scopes to be defined by dot
+                                        notation (e.g "outputs.my_model_outputs") expects list or dictionary type object
+                                        in path.
             :param override:            bool allow override existing model on the current ModelRunnerStep.
             :param model_parameters:    Parameters for model instantiation
         """
@@ -2588,6 +2940,15 @@ class RootFlowStep(FlowStep):
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
             )
+        if type(model_class) is LLModel or (
+            isinstance(model_class, str)
+            and model_class.split(".")[-1] == LLModel.__name__
+        ):
+            if outputs:
+                warnings.warn(
+                    "LLModel with existing outputs detected, overriding to default"
+                )
+            outputs = UsageResponseKeys.fields()
         if execution_mechanism == ParallelExecutionMechanisms.shared_executor:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -2615,6 +2976,14 @@ class RootFlowStep(FlowStep):
                 "Inconsistent name for the added model."
             )
         model_parameters["name"] = name
+        model_parameters["inputs"] = inputs or model_parameters.get("inputs", [])
+        model_parameters["outputs"] = outputs or model_parameters.get("outputs", [])
+        model_parameters["input_path"] = input_path or model_parameters.get(
+            "input_path"
+        )
+        model_parameters["result_path"] = result_path or model_parameters.get(
+            "result_path"
+        )
         if name in self.shared_models and not override:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -2629,7 +2998,9 @@ class RootFlowStep(FlowStep):
         self.shared_models[name] = (model_class, model_parameters)
         self.shared_models_mechanism[name] = execution_mechanism
-    def get_shared_model_name_by_artifact_uri(self, artifact_uri: str) -> Optional[str]:
+    def get_shared_model_by_artifact_uri(
+        self, artifact_uri: str
+    ) -> Union[tuple[str, str, dict], tuple[None, None, None]]:
         """
         Get a shared model by its artifact URI.
         :param artifact_uri: The artifact URI of the model.
@@ -2637,10 +3008,10 @@ class RootFlowStep(FlowStep):
         """
         for model_name, (model_class, model_params) in self.shared_models.items():
             if model_params.get("artifact_uri") == artifact_uri:
-                return model_name
-        return None
+                return model_name, model_class, model_params
+        return None, None, None
-    def config_pool_resource(
+    def configure_shared_pool_resource(
         self,
         max_processes: Optional[int] = None,
         max_threads: Optional[int] = None,
@@ -2688,6 +3059,7 @@ class RootFlowStep(FlowStep):
                     model_params, init_with_params=True
                 )
                 model._raise_exception = False
+                model._execution_mechanism = self._shared_models_mechanism[model.name]
                 self.context.executor.add_runnable(
                     model, self._shared_models_mechanism[model.name]
                 )
@@ -2807,12 +3179,10 @@ def _add_graphviz_router(graph, step, source=None, **kwargs):
         graph.edge(step.fullname, route.fullname)
-def _add_graphviz_model_runner(graph, step, source=None):
+def _add_graphviz_model_runner(graph, step, source=None, is_monitored=False):
     if source:
         graph.node("_start", source.name, shape=source.shape, style="filled")
         graph.edge("_start", step.fullname)
-    is_monitored = step._extract_root_step().track_models
     m_cell = '<FONT POINT-SIZE="9">🄼</FONT>' if is_monitored else ""
     number_of_models = len(
@@ -2851,6 +3221,7 @@ def _add_graphviz_flow(
         allow_empty=True
     )
     graph.node("_start", source.name, shape=source.shape, style="filled")
+    is_monitored = step.track_models if isinstance(step, RootFlowStep) else False
     for start_step in start_steps:
         graph.edge("_start", start_step.fullname)
     for child in step.get_children():
@@ -2859,7 +3230,7 @@ def _add_graphviz_flow(
             with graph.subgraph(name="cluster_" + child.fullname) as sg:
                 _add_graphviz_router(sg, child)
         elif kind == StepKinds.model_runner:
-            _add_graphviz_model_runner(graph, child)
+            _add_graphviz_model_runner(graph, child, is_monitored=is_monitored)
         else:
             graph.node(child.fullname, label=child.name, shape=child.get_shape())
         _add_edges(child.after or [], step, graph, child)
@@ -3078,7 +3449,7 @@ def _init_async_objects(context, steps):
                         datastore_profile = datastore_profile_read(stream_path)
                         if isinstance(
                             datastore_profile,
-                            (DatastoreProfileKafkaTarget, DatastoreProfileKafkaSource),
+                            (DatastoreProfileKafkaTarget, DatastoreProfileKafkaStream),
                         ):
                             step._async_object = KafkaStoreyTarget(
                                 path=stream_path,
@@ -3094,7 +3465,7 @@ def _init_async_objects(context, steps):
                         else:
                             raise mlrun.errors.MLRunValueError(
                                 f"Received an unexpected stream profile type: {type(datastore_profile)}\n"
-                                "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
+                                "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
                             )
                     elif stream_path.startswith("kafka://") or kafka_brokers:
                         topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
@@ -3110,6 +3481,8 @@ def _init_async_objects(context, steps):
                             context=context,
                             **options,
                         )
+                    elif stream_path.startswith("dummy://"):
+                        step._async_object = _DummyStream(context=context, **options)
                     else:
                         if stream_path.startswith("v3io://"):
                             endpoint, stream_path = parse_path(step.path)

mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc16py3-none-any.whl → 1.10.1rc4py3-none-any.whl