PyPI - mlrun - Versions diffs - 1.10.0rc14__py3-none-any.whl → 1.10.0rc15__py3-none-any.whl - Mend

mlrun 1.10.0rc14py3-none-any.whl → 1.10.0rc15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (41) hide show

mlrun/artifacts/base.py +0 -31
mlrun/artifacts/manager.py +0 -5
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/functions.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
mlrun/config.py +1 -1
mlrun/datastore/model_provider/model_provider.py +42 -14
mlrun/datastore/model_provider/openai_provider.py +96 -15
mlrun/db/base.py +14 -0
mlrun/db/httpdb.py +42 -9
mlrun/db/nopdb.py +8 -0
mlrun/model_monitoring/__init__.py +1 -0
mlrun/model_monitoring/applications/base.py +176 -20
mlrun/model_monitoring/db/_schedules.py +84 -24
mlrun/model_monitoring/db/tsdb/base.py +72 -1
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
mlrun/model_monitoring/helpers.py +26 -4
mlrun/projects/project.py +26 -6
mlrun/runtimes/daskjob.py +6 -0
mlrun/runtimes/mpijob/abstract.py +6 -0
mlrun/runtimes/mpijob/v1.py +6 -0
mlrun/runtimes/nuclio/application/application.py +2 -0
mlrun/runtimes/nuclio/function.py +6 -0
mlrun/runtimes/nuclio/serving.py +12 -11
mlrun/runtimes/pod.py +21 -0
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/serving/server.py +95 -26
mlrun/serving/states.py +16 -0
mlrun/utils/helpers.py +36 -12
mlrun/utils/retryer.py +15 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc15.dist-info}/METADATA +2 -7
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc15.dist-info}/RECORD +41 -41
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc15.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc15.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc15.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc15.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -469,6 +469,7 @@ class TDEngineConnector(TSDBConnector):
         preform_agg_columns: Optional[list] = None,
         order_by: Optional[str] = None,
         desc: Optional[bool] = None,
+        partition_by: Optional[str] = None,
     ) -> pd.DataFrame:
         """
         Getting records from TSDB data collection.
@@ -496,6 +497,8 @@ class TDEngineConnector(TSDBConnector):
                                       if an empty list was provided The aggregation won't be performed.
         :param order_by:              The column or alias to preform ordering on the query.
         :param desc:                  Whether or not to sort the results in descending order.
+        :param partition_by:          The column to partition the results by. Note that if interval is provided,
+                                      `agg_funcs` must bg provided as well.
         :return: DataFrame with the provided attributes from the data collection.
         :raise:  MLRunInvalidArgumentError if query the provided table failed.
@@ -517,6 +520,7 @@ class TDEngineConnector(TSDBConnector):
             preform_agg_funcs_columns=preform_agg_columns,
             order_by=order_by,
             desc=desc,
+            partition_by=partition_by,
         )
         logger.debug("Querying TDEngine", query=full_query)
         try:
@@ -1205,6 +1209,39 @@ class TDEngineConnector(TSDBConnector):
             )
         )
+    def get_drift_data(
+        self,
+        start: datetime,
+        end: datetime,
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        filter_query = self._generate_filter_query(
+            filter_column=mm_schemas.ResultData.RESULT_STATUS,
+            filter_values=[
+                mm_schemas.ResultStatusApp.potential_detection.value,
+                mm_schemas.ResultStatusApp.detected.value,
+            ],
+        )
+        table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
+        start, end, interval = self._prepare_aligned_start_end(start, end)
+        # get per time-interval x endpoint_id combination the max result status
+        df = self._get_records(
+            table=table,
+            start=start,
+            end=end,
+            interval=interval,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            filter_query=filter_query,
+            timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            agg_funcs=["max"],
+            partition_by=mm_schemas.WriterEvent.ENDPOINT_ID,
+        )
+        if df.empty:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        df["_wstart"] = pd.to_datetime(df["_wstart"])
+        return self._df_to_drift_data(df)
     # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
     #
     # def read_prediction_metric_for_endpoint_if_exists(

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -1450,3 +1450,28 @@ class V3IOTSDBConnector(TSDBConnector):
             return metric_objects
         return build_metric_objects()
+    def get_drift_data(
+        self,
+        start: datetime,
+        end: datetime,
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        table = mm_schemas.V3IOTSDBTables.APP_RESULTS
+        start, end, interval = self._prepare_aligned_start_end(start, end)
+        # get per time-interval x endpoint_id combination the max result status
+        df = self._get_records(
+            table=table,
+            start=start,
+            end=end,
+            interval=interval,
+            sliding_window_step=interval,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            agg_funcs=["max"],
+            group_by=mm_schemas.WriterEvent.ENDPOINT_ID,
+        )
+        if df.empty:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
+        df = df.reset_index(names="_wstart")
+        return self._df_to_drift_data(df)

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -549,6 +549,10 @@ def _get_monitoring_schedules_folder_path(project: str) -> str:
     )
+def _get_monitoring_schedules_user_folder_path(out_path: str) -> str:
+    return os.path.join(out_path, mm_constants.FileTargetKind.MONITORING_SCHEDULES)
 def _get_monitoring_schedules_file_endpoint_path(
     *, project: str, endpoint_id: str
 ) -> str:
@@ -570,10 +574,7 @@ def get_monitoring_schedules_endpoint_data(
     )
-def get_monitoring_schedules_chief_data(
-    *,
-    project: str,
-) -> "DataItem":
+def get_monitoring_schedules_chief_data(*, project: str) -> "DataItem":
     """
     Get the model monitoring schedules' data item of the project's model endpoint.
     """
@@ -582,6 +583,19 @@ def get_monitoring_schedules_chief_data(
     )
+def get_monitoring_schedules_user_application_data(
+    *, out_path: str, application: str
+) -> "DataItem":
+    """
+    Get the model monitoring schedules' data item of user application runs.
+    """
+    return mlrun.datastore.store_manager.object(
+        _get_monitoring_schedules_file_user_application_path(
+            out_path=out_path, application=application
+        )
+    )
 def _get_monitoring_schedules_file_chief_path(
     *,
     project: str,
@@ -591,6 +605,14 @@ def _get_monitoring_schedules_file_chief_path(
     )
+def _get_monitoring_schedules_file_user_application_path(
+    *, out_path: str, application: str
+) -> str:
+    return os.path.join(
+        _get_monitoring_schedules_user_folder_path(out_path), f"{application}.json"
+    )
 def get_start_end(
     start: Union[datetime.datetime, None],
     end: Union[datetime.datetime, None],

mlrun/projects/project.py CHANGED Viewed

@@ -1042,12 +1042,7 @@ class ProjectSpec(ModelObj):
                 artifact = artifact.to_dict()
             else:  # artifact is a dict
                 # imported/legacy artifacts don't have metadata,spec,status fields
-                key_field = (
-                    "key"
-                    if _is_imported_artifact(artifact)
-                    or mlrun.utils.is_legacy_artifact(artifact)
-                    else "metadata.key"
-                )
+                key_field = "key" if _is_imported_artifact(artifact) else "metadata.key"
                 key = mlrun.utils.get_in(artifact, key_field, "")
                 if not key:
                     raise ValueError(f'artifacts "{key_field}" must be specified')
@@ -5557,6 +5552,31 @@ class MlrunProject(ModelObj):
             **kwargs,
         )
+    def get_drift_over_time(
+        self,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+    ) -> mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues:
+        """
+        Get drift counts over time for the project.
+        This method returns a list of tuples, each representing a time-interval (in a granularity set by the
+        duration of the given time range) and the number of suspected drifts and detected drifts in that interval.
+        For a range of 6 hours or less, the granularity is 10 minute, for a range of 2 hours to 72 hours, the
+        granularity is 1 hour, and for a range of more than 72 hours, the granularity is 24 hours.
+        :param start: Start time of the range to retrieve drift counts from.
+        :param end: End time of the range to retrieve drift counts from.
+        :return: A ModelEndpointDriftValues object containing the drift counts over time.
+        """
+        db = mlrun.db.get_run_db(secrets=self._secrets)
+        return db.get_drift_over_time(
+            project=self.metadata.name,
+            start=start,
+            end=end,
+        )
     def _run_authenticated_git_action(
         self,
         action: Callable,

mlrun/runtimes/daskjob.py CHANGED Viewed

@@ -93,6 +93,9 @@ class DaskSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -123,6 +126,9 @@ class DaskSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.args = args

mlrun/runtimes/mpijob/abstract.py CHANGED Viewed

@@ -55,6 +55,9 @@ class MPIResourceSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -85,6 +88,9 @@ class MPIResourceSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.mpi_args = mpi_args or [
             "-x",

mlrun/runtimes/mpijob/v1.py CHANGED Viewed

@@ -50,6 +50,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -81,6 +84,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()

mlrun/runtimes/nuclio/application/application.py CHANGED Viewed

@@ -400,8 +400,10 @@ class ApplicationRuntime(RemoteRuntime):
         # nuclio implementation detail - when providing the image and emptying out the source code and build source,
         # nuclio skips rebuilding the image and simply takes the prebuilt image
         self.spec.build.functionSourceCode = ""
+        self.spec.config.pop("spec.build.functionSourceCode", None)
         self.status.application_source = self.spec.build.source
         self.spec.build.source = ""
+        self.spec.config.pop("spec.build.source", None)
         # save the image in the status, so we won't repopulate the function source code
         self.status.container_image = image

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -155,6 +155,9 @@ class NuclioSpec(KubeResourceSpec):
         state_thresholds=None,
         disable_default_http_trigger=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -185,6 +188,9 @@ class NuclioSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.base_spec = base_spec or {}

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -720,6 +720,7 @@ class ServingRuntime(RemoteRuntime):
             "track_models": self.spec.track_models,
             "default_content_type": self.spec.default_content_type,
             "model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
+            # TODO: find another way to pass this (needed for local run)
             "filename": getattr(self.spec, "filename", None),
         }
@@ -788,17 +789,13 @@ class ServingRuntime(RemoteRuntime):
             monitoring_mock=self.spec.track_models,
         )
-        if (
-            isinstance(self.spec.graph, RootFlowStep)
-            and self.spec.graph.include_monitored_step()
-        ):
-            server.graph = add_system_steps_to_graph(
-                server.project,
-                server.graph,
-                self.spec.track_models,
-                server.context,
-                self.spec,
-            )
+        server.graph = add_system_steps_to_graph(
+            server.project,
+            server.graph,
+            self.spec.track_models,
+            server.context,
+            self.spec,
+        )
         if workdir:
             os.chdir(old_workdir)
@@ -858,6 +855,7 @@ class ServingRuntime(RemoteRuntime):
             description=self.spec.description,
             workdir=self.spec.workdir,
             image_pull_secret=self.spec.image_pull_secret,
+            build=self.spec.build,
             node_name=self.spec.node_name,
             node_selector=self.spec.node_selector,
             affinity=self.spec.affinity,
@@ -868,6 +866,9 @@ class ServingRuntime(RemoteRuntime):
             security_context=self.spec.security_context,
             state_thresholds=self.spec.state_thresholds,
             serving_spec=self._get_serving_spec(),
+            track_models=self.spec.track_models,
+            parameters=self.spec.parameters,
+            graph=self.spec.graph,
         )
         job = KubejobRuntime(
             spec=spec,

mlrun/runtimes/pod.py CHANGED Viewed

@@ -104,6 +104,9 @@ class KubeResourceSpec(FunctionSpec):
         "security_context",
         "state_thresholds",
         "serving_spec",
+        "track_models",
+        "parameters",
+        "graph",
     ]
     _default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
         "volumes",
@@ -180,6 +183,9 @@ class KubeResourceSpec(FunctionSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        track_models=None,
+        parameters=None,
+        graph=None,
     ):
         super().__init__(
             command=command,
@@ -226,6 +232,10 @@ class KubeResourceSpec(FunctionSpec):
             or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
         )
         self.serving_spec = serving_spec
+        self.track_models = track_models
+        self.parameters = parameters
+        self._graph = None
+        self.graph = graph
         # Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
         # _dict_fields and doesn't have a setter.
         self._termination_grace_period_seconds = None
@@ -303,6 +313,17 @@ class KubeResourceSpec(FunctionSpec):
     def termination_grace_period_seconds(self) -> typing.Optional[int]:
         return self._termination_grace_period_seconds
+    @property
+    def graph(self):
+        """states graph, holding the serving workflow/DAG topology"""
+        return self._graph
+    @graph.setter
+    def graph(self, graph):
+        from ..serving.states import graph_root_setter
+        graph_root_setter(self, graph)
     def _serialize_field(
         self, struct: dict, field_name: typing.Optional[str] = None, strip: bool = False
     ) -> typing.Any:

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -59,6 +59,9 @@ class RemoteSparkSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -89,6 +92,9 @@ class RemoteSparkSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.provider = provider

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -169,6 +169,9 @@ class Spark3JobSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -199,6 +202,9 @@ class Spark3JobSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.driver_resources = driver_resources or {}

mlrun/serving/server.py CHANGED Viewed

@@ -15,6 +15,7 @@
 __all__ = ["GraphServer", "create_graph_server", "GraphContext", "MockEvent"]
 import asyncio
+import base64
 import copy
 import json
 import os
@@ -384,6 +385,7 @@ def add_monitoring_general_steps(
     graph: RootFlowStep,
     context,
     serving_spec,
+    pause_until_background_task_completion: bool,
 ) -> tuple[RootFlowStep, FlowStep]:
     """
     Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
@@ -392,18 +394,22 @@ def add_monitoring_general_steps(
         "background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
         --> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
     """
+    background_task_status_step = None
+    if pause_until_background_task_completion:
+        background_task_status_step = graph.add_step(
+            "mlrun.serving.system_steps.BackgroundTaskStatus",
+            "background_task_status_step",
+            model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
+        )
     monitor_flow_step = graph.add_step(
-        "mlrun.serving.system_steps.BackgroundTaskStatus",
-        "background_task_status_step",
-        model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
-    )
-    graph.add_step(
         "storey.Filter",
         "filter_none",
         _fn="(event is not None)",
-        after="background_task_status_step",
+        after="background_task_status_step" if background_task_status_step else None,
         model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
     )
+    if background_task_status_step:
+        monitor_flow_step = background_task_status_step
     graph.add_step(
         "mlrun.serving.system_steps.MonitoringPreProcessor",
         "monitoring_pre_processor_step",
@@ -466,14 +472,28 @@ def add_monitoring_general_steps(
 def add_system_steps_to_graph(
-    project: str, graph: RootFlowStep, track_models: bool, context, serving_spec
+    project: str,
+    graph: RootFlowStep,
+    track_models: bool,
+    context,
+    serving_spec,
+    pause_until_background_task_completion: bool = True,
 ) -> RootFlowStep:
+    if not (isinstance(graph, RootFlowStep) and graph.include_monitored_step()):
+        return graph
     monitored_steps = graph.get_monitored_steps()
     graph = add_error_raiser_step(graph, monitored_steps)
     if track_models:
+        background_task_status_step = None
         graph, monitor_flow_step = add_monitoring_general_steps(
-            project, graph, context, serving_spec
+            project,
+            graph,
+            context,
+            serving_spec,
+            pause_until_background_task_completion,
         )
+        if background_task_status_step:
+            monitor_flow_step = background_task_status_step
         # Connect each model runner to the monitoring step:
         for step_name, step in monitored_steps.items():
             if monitor_flow_step.after:
@@ -485,6 +505,10 @@ def add_system_steps_to_graph(
                 monitor_flow_step.after = [
                     step_name,
                 ]
+    context.logger.info_with(
+        "Server graph after adding system steps",
+        graph=str(graph.steps),
+    )
     return graph
@@ -494,18 +518,13 @@ def v2_serving_init(context, namespace=None):
     context.logger.info("Initializing server from spec")
     spec = mlrun.utils.get_serving_spec()
     server = GraphServer.from_dict(spec)
-    if isinstance(server.graph, RootFlowStep) and server.graph.include_monitored_step():
-        server.graph = add_system_steps_to_graph(
-            server.project,
-            copy.deepcopy(server.graph),
-            spec.get("track_models"),
-            context,
-            spec,
-        )
-        context.logger.info_with(
-            "Server graph after adding system steps",
-            graph=str(server.graph.steps),
-        )
+    server.graph = add_system_steps_to_graph(
+        server.project,
+        copy.deepcopy(server.graph),
+        spec.get("track_models"),
+        context,
+        spec,
+    )
     if config.log_level.lower() == "debug":
         server.verbose = True
@@ -544,17 +563,57 @@ async def async_execute_graph(
     data: DataItem,
     batching: bool,
     batch_size: Optional[int],
+    read_as_lists: bool,
+    nest_under_inputs: bool,
 ) -> list[Any]:
     spec = mlrun.utils.get_serving_spec()
-    source_filename = spec.get("filename", None)
     namespace = {}
-    if source_filename:
-        with open(source_filename) as f:
-            exec(f.read(), namespace)
+    code = os.getenv("MLRUN_EXEC_CODE")
+    if code:
+        code = base64.b64decode(code).decode("utf-8")
+        exec(code, namespace)
+    else:
+        # TODO: find another way to get the local file path, or ensure that MLRUN_EXEC_CODE
+        #  gets set in local flow and not just in the remote pod
+        source_filename = spec.get("filename", None)
+        if source_filename:
+            with open(source_filename) as f:
+                exec(f.read(), namespace)
     server = GraphServer.from_dict(spec)
+    if server.model_endpoint_creation_task_name:
+        context.logger.info(
+            f"Waiting for model endpoint creation task '{server.model_endpoint_creation_task_name}'..."
+        )
+        background_task = (
+            mlrun.get_run_db().wait_for_background_task_to_reach_terminal_state(
+                project=server.project,
+                name=server.model_endpoint_creation_task_name,
+            )
+        )
+        task_state = background_task.status.state
+        if task_state == mlrun.common.schemas.BackgroundTaskState.failed:
+            raise mlrun.errors.MLRunRuntimeError(
+                "Aborting job due to model endpoint creation background task failure"
+            )
+        elif task_state != mlrun.common.schemas.BackgroundTaskState.succeeded:
+            # this shouldn't happen, but we need to know if it does
+            raise mlrun.errors.MLRunRuntimeError(
+                "Aborting job because the model endpoint creation background task did not succeed "
+                f"(status='{task_state}')"
+            )
+    server.graph = add_system_steps_to_graph(
+        server.project,
+        copy.deepcopy(server.graph),
+        spec.get("track_models"),
+        context,
+        spec,
+        pause_until_background_task_completion=False,  # we've already awaited it
+    )
     if config.log_level.lower() == "debug":
         server.verbose = True
     context.logger.info_with("Initializing states", namespace=namespace)
@@ -588,7 +647,9 @@ async def async_execute_graph(
     batch = []
     for index, row in df.iterrows():
-        data = row.to_dict()
+        data = row.to_list() if read_as_lists else row.to_dict()
+        if nest_under_inputs:
+            data = {"inputs": data}
         if batching:
             batch.append(data)
             if len(batch) == batch_size:
@@ -612,6 +673,8 @@ def execute_graph(
     data: DataItem,
     batching: bool = False,
     batch_size: Optional[int] = None,
+    read_as_lists: bool = False,
+    nest_under_inputs: bool = False,
 ) -> (list[Any], Any):
     """
     Execute graph as a job, from start to finish.
@@ -621,10 +684,16 @@ def execute_graph(
     :param batching: Whether to push one or more batches into the graph rather than row by row.
     :param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
         be pushed into the graph in one batch.
+    :param read_as_lists: Whether to read each row as a list instead of a dictionary.
+    :param nest_under_inputs: Whether to wrap each row with {"inputs": ...}.
     :return: A list of responses.
     """
-    return asyncio.run(async_execute_graph(context, data, batching, batch_size))
+    return asyncio.run(
+        async_execute_graph(
+            context, data, batching, batch_size, read_as_lists, nest_under_inputs
+        )
+    )
 def _set_callbacks(server, context):

mlrun/serving/states.py CHANGED Viewed

@@ -1203,11 +1203,27 @@ class LLModel(Model):
     def predict(
         self, body: Any, messages: list[dict], model_configuration: dict
     ) -> Any:
+        if isinstance(
+            self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
+        ) and isinstance(self.model_provider, ModelProvider):
+            body["result"] = self.model_provider.invoke(
+                messages=messages,
+                as_str=True,
+                **(model_configuration or {}),
+            )
         return body
     async def predict_async(
         self, body: Any, messages: list[dict], model_configuration: dict
     ) -> Any:
+        if isinstance(
+            self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
+        ) and isinstance(self.model_provider, ModelProvider):
+            body["result"] = await self.model_provider.async_invoke(
+                messages=messages,
+                as_str=True,
+                **(model_configuration or {}),
+            )
         return body
     def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:

mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc15__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc14py3-none-any.whl → 1.10.0rc15py3-none-any.whl