PyPI - mlrun - Versions diffs - 1.10.0rc19__py3-none-any.whl → 1.10.0rc21__py3-none-any.whl - Mend

mlrun 1.10.0rc19py3-none-any.whl → 1.10.0rc21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (31) hide show

mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/model_monitoring/constants.py +4 -11
mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
mlrun/datastore/model_provider/huggingface_provider.py +109 -20
mlrun/datastore/model_provider/model_provider.py +110 -32
mlrun/datastore/model_provider/openai_provider.py +87 -31
mlrun/db/base.py +0 -19
mlrun/db/httpdb.py +10 -46
mlrun/db/nopdb.py +0 -10
mlrun/launcher/base.py +0 -6
mlrun/model_monitoring/api.py +43 -22
mlrun/model_monitoring/applications/base.py +1 -1
mlrun/model_monitoring/controller.py +112 -38
mlrun/model_monitoring/db/_schedules.py +13 -9
mlrun/model_monitoring/stream_processing.py +16 -12
mlrun/platforms/__init__.py +3 -2
mlrun/projects/project.py +2 -2
mlrun/run.py +38 -5
mlrun/serving/server.py +23 -0
mlrun/serving/states.py +76 -29
mlrun/serving/system_steps.py +60 -36
mlrun/utils/helpers.py +27 -13
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/METADATA +6 -5
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/RECORD +30 -31
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/_schedules.py CHANGED Viewed

@@ -162,19 +162,19 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
             endpoint_id=model_endpoint.metadata.uid,
         )
-    def get_application_time(self, application: str) -> Optional[int]:
+    def get_application_time(self, application: str) -> Optional[float]:
         self._check_open_schedules()
         return self._schedules.get(application)
-    def update_application_time(self, application: str, timestamp: int) -> None:
+    def update_application_time(self, application: str, timestamp: float) -> None:
         self._check_open_schedules()
-        self._schedules[application] = timestamp
+        self._schedules[application] = float(timestamp)
     def get_application_list(self) -> set[str]:
         self._check_open_schedules()
         return set(self._schedules.keys())
-    def get_min_timestamp(self) -> Optional[int]:
+    def get_min_timestamp(self) -> Optional[float]:
         self._check_open_schedules()
         return min(self._schedules.values(), default=None)
@@ -198,7 +198,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
             project=self._project
         )
-    def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[int]:
+    def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[float]:
         self._check_open_schedules()
         if endpoint_uid in self._schedules:
             return self._schedules[endpoint_uid].get(
@@ -208,15 +208,19 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
             return None
     def update_endpoint_timestamps(
-        self, endpoint_uid: str, last_request: int, last_analyzed: int
+        self, endpoint_uid: str, last_request: float, last_analyzed: float
     ) -> None:
         self._check_open_schedules()
         self._schedules[endpoint_uid] = {
-            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: last_request,
-            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: last_analyzed,
+            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: float(
+                last_request
+            ),
+            schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: float(
+                last_analyzed
+            ),
         }
-    def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[int]:
+    def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[float]:
         self._check_open_schedules()
         if endpoint_uid in self._schedules:
             return self._schedules[endpoint_uid].get(

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -396,6 +396,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         request_id = event.get("request", {}).get("id") or event.get("resp", {}).get(
             "id"
         )
+        feature_names = event.get("request", {}).get("input_schema")
+        labels_names = event.get("resp", {}).get("output_schema")
         latency = event.get("microsec")
         features = event.get("request", {}).get("inputs")
         predictions = event.get("resp", {}).get("outputs")
@@ -496,6 +498,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                     ),
                     EventFieldType.EFFECTIVE_SAMPLE_COUNT: effective_sample_count,
                     EventFieldType.ESTIMATED_PREDICTION_COUNT: estimated_prediction_count,
+                    EventFieldType.FEATURE_NAMES: feature_names,
+                    EventFieldType.LABEL_NAMES: labels_names,
                 }
             )
@@ -602,19 +606,19 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         self.endpoint_type = {}
     def _infer_feature_names_from_data(self, event):
-        for endpoint_id in self.feature_names:
-            if len(self.feature_names[endpoint_id]) >= len(
-                event[EventFieldType.FEATURES]
-            ):
-                return self.feature_names[endpoint_id]
+        endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        if endpoint_id in self.feature_names and len(
+            self.feature_names[endpoint_id]
+        ) >= len(event[EventFieldType.FEATURES]):
+            return self.feature_names[endpoint_id]
         return None
     def _infer_label_columns_from_data(self, event):
-        for endpoint_id in self.label_columns:
-            if len(self.label_columns[endpoint_id]) >= len(
-                event[EventFieldType.PREDICTION]
-            ):
-                return self.label_columns[endpoint_id]
+        endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        if endpoint_id in self.label_columns and len(
+            self.label_columns[endpoint_id]
+        ) >= len(event[EventFieldType.PREDICTION]):
+            return self.label_columns[endpoint_id]
         return None
     def do(self, event: dict):
@@ -659,7 +663,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     "Feature names are not initialized, they will be automatically generated",
                     endpoint_id=endpoint_id,
                 )
-                feature_names = [
+                feature_names = event.get(EventFieldType.FEATURE_NAMES) or [
                     f"f{i}" for i, _ in enumerate(event[EventFieldType.FEATURES])
                 ]
@@ -682,7 +686,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     "label column names are not initialized, they will be automatically generated",
                     endpoint_id=endpoint_id,
                 )
-                label_columns = [
+                label_columns = event.get(EventFieldType.LABEL_NAMES) or [
                     f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
                 ]
                 attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns

mlrun/platforms/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .iguazio import (
 )
+# TODO: Remove in 1.11.0
 class _DeprecationHelper:
     """A helper class to deprecate old schemas"""
@@ -48,12 +49,12 @@ class _DeprecationHelper:
     def _warn(self):
         warnings.warn(
             f"mlrun.platforms.{self._new_target} is deprecated since version {self._version}, "
-            f"and will be removed in 1.10. Use mlrun.runtimes.mounts.{self._new_target} instead.",
+            f"and will be removed in 1.11.0. Use mlrun.runtimes.mounts.{self._new_target} instead.",
             FutureWarning,
         )
-# TODO: Remove in 1.10
+# TODO: Remove in 1.11.0
 # For backwards compatibility
 VolumeMount = _DeprecationHelper("VolumeMount")
 auto_mount = _DeprecationHelper("auto_mount")

mlrun/projects/project.py CHANGED Viewed

@@ -3939,8 +3939,8 @@ class MlrunProject(ModelObj):
         :param start:           The start time to filter by.Corresponding to the `created` field.
         :param end:             The end time to filter by. Corresponding to the `created` field.
         :param top_level:       If true will return only routers and endpoint that are NOT children of any router.
-        :param mode:            Specifies the mode of the model endpoint. Can be "real-time", "batch", or both if set
-                                to None.
+        :param mode:            Specifies the mode of the model endpoint. Can be "real-time" (0), "batch" (1), or
+                                both if set to None.
         :param uids:            If passed will return a list `ModelEndpoint` object with uid in uids.
         :param tsdb_metrics:    When True, the time series metrics will be added to the output
                                 of the resulting.

mlrun/run.py CHANGED Viewed

@@ -141,7 +141,7 @@ def load_func_code(command="", workdir=None, secrets=None, name="name"):
         else:
             is_remote = "://" in command
             data = get_object(command, secrets)
-            runtime = yaml.load(data, Loader=yaml.FullLoader)
+            runtime = yaml.safe_load(data)
             runtime = new_function(runtime=runtime)
         command = runtime.spec.command or ""
@@ -362,7 +362,10 @@ def import_function(url="", secrets=None, db="", project=None, new_name=None):
     return function
-def import_function_to_dict(url, secrets=None):
+def import_function_to_dict(
+    url: str,
+    secrets: Optional[dict] = None,
+) -> dict:
     """Load function spec from local/remote YAML file"""
     obj = get_object(url, secrets)
     runtime = yaml.safe_load(obj)
@@ -388,6 +391,11 @@ def import_function_to_dict(url, secrets=None):
                 raise ValueError("exec path (spec.command) must be relative")
             url = url[: url.rfind("/") + 1] + code_file
             code = get_object(url, secrets)
+            code_file = _ensure_path_confined_to_base_dir(
+                base_directory=".",
+                relative_path=code_file,
+                error_message_on_escape="Path traversal detected in spec.command",
+            )
             dir = path.dirname(code_file)
             if dir:
                 makedirs(dir, exist_ok=True)
@@ -395,9 +403,16 @@ def import_function_to_dict(url, secrets=None):
                 fp.write(code)
         elif cmd:
             if not path.isfile(code_file):
-                # look for the file in a relative path to the yaml
-                slash = url.rfind("/")
-                if slash >= 0 and path.isfile(url[: url.rfind("/") + 1] + code_file):
+                slash_index = url.rfind("/")
+                if slash_index < 0:
+                    raise ValueError(f"no file in exec path (spec.command={code_file})")
+                base_dir = os.path.normpath(url[: slash_index + 1])
+                candidate_path = _ensure_path_confined_to_base_dir(
+                    base_directory=base_dir,
+                    relative_path=code_file,
+                    error_message_on_escape=f"exec file spec.command={code_file} is outside of allowed directory",
+                )
+                if path.isfile(candidate_path):
                     raise ValueError(
                         f"exec file spec.command={code_file} is relative, change working dir"
                     )
@@ -1258,3 +1273,21 @@ def wait_for_runs_completion(
         runs = running
     return completed
+def _ensure_path_confined_to_base_dir(
+    base_directory: str,
+    relative_path: str,
+    error_message_on_escape: str,
+) -> str:
+    """
+    Join `user_supplied_relative_path` to `allowed_base_directory`, normalise the result,
+    and guarantee it stays inside `allowed_base_directory`.
+    """
+    absolute_base_directory = path.abspath(base_directory)
+    absolute_candidate_path = path.abspath(
+        path.join(absolute_base_directory, relative_path)
+    )
+    if not absolute_candidate_path.startswith(absolute_base_directory + path.sep):
+        raise ValueError(error_message_on_escape)
+    return absolute_candidate_path

mlrun/serving/server.py CHANGED Viewed

@@ -746,6 +746,26 @@ async def async_execute_graph(
     return responses
+def _is_inside_asyncio_loop():
+    try:
+        asyncio.get_running_loop()
+        return True
+    except RuntimeError:
+        return False
+# Workaround for running with local=True in Jupyter (ML-10620)
+def _workaround_asyncio_nesting():
+    try:
+        import nest_asyncio
+    except ImportError:
+        raise mlrun.errors.MLRunRuntimeError(
+            "Cannot execute graph from within an already running asyncio loop. "
+            "Attempt to import nest_asyncio as a workaround failed as well."
+        )
+    nest_asyncio.apply()
 def execute_graph(
     context: MLClientCtx,
     data: DataItem,
@@ -771,6 +791,9 @@ def execute_graph(
     :return: A list of responses.
     """
+    if _is_inside_asyncio_loop():
+        _workaround_asyncio_nesting()
     return asyncio.run(
         async_execute_graph(
             context,

mlrun/serving/states.py CHANGED Viewed

@@ -24,6 +24,7 @@ import inspect
 import os
 import pathlib
 import traceback
+import warnings
 from abc import ABC
 from copy import copy, deepcopy
 from inspect import getfullargspec, signature
@@ -43,9 +44,13 @@ from mlrun.datastore.datastore_profile import (
     DatastoreProfileV3io,
     datastore_profile_read,
 )
-from mlrun.datastore.model_provider.model_provider import ModelProvider
+from mlrun.datastore.model_provider.model_provider import (
+    InvokeResponseFormat,
+    ModelProvider,
+    UsageResponseKeys,
+)
 from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
-from mlrun.utils import get_data_from_path, logger, split_path
+from mlrun.utils import get_data_from_path, logger, set_data_by_path, split_path
 from ..config import config
 from ..datastore import _DummyStream, get_stream_pusher
@@ -1206,10 +1211,15 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
 class LLModel(Model):
     def __init__(
-        self, name: str, input_path: Optional[Union[str, list[str]]] = None, **kwargs
+        self,
+        name: str,
+        input_path: Optional[Union[str, list[str]]] = None,
+        result_path: Optional[Union[str, list[str]]] = None,
+        **kwargs,
     ):
         super().__init__(name, **kwargs)
         self._input_path = split_path(input_path)
+        self._result_path = split_path(result_path)
     def predict(
         self,
@@ -1221,11 +1231,14 @@ class LLModel(Model):
         if isinstance(
             self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
         ) and isinstance(self.model_provider, ModelProvider):
-            body["result"] = self.model_provider.invoke(
+            response_with_stats = self.model_provider.invoke(
                 messages=messages,
-                as_str=True,
+                invoke_response_format=InvokeResponseFormat.USAGE,
                 **(model_configuration or {}),
             )
+            set_data_by_path(
+                path=self._result_path, data=body, value=response_with_stats
+            )
         return body
     async def predict_async(
@@ -1238,11 +1251,14 @@ class LLModel(Model):
         if isinstance(
             self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
         ) and isinstance(self.model_provider, ModelProvider):
-            body["result"] = await self.model_provider.async_invoke(
+            response_with_stats = await self.model_provider.async_invoke(
                 messages=messages,
-                as_str=True,
+                invoke_response_format=InvokeResponseFormat.USAGE,
                 **(model_configuration or {}),
             )
+            set_data_by_path(
+                path=self._result_path, data=body, value=response_with_stats
+            )
         return body
     def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
@@ -1609,6 +1625,9 @@ class ModelRunnerStep(MonitoredStep):
           :param outputs:             list of the model outputs (e.g. labels) ,if provided will override the outputs
                                       that been configured in the model artifact, please note that those outputs need to
                                       be equal to the model_class predict method outputs (length, and order)
+                                      When using LLModel, the output will be overridden with UsageResponseKeys.fields().
           :param input_path:          when specified selects the key/path in the event to use as model monitoring inputs
                                       this require that the event body will behave like a dict, expects scopes to be
                                       defined by dot notation (e.g "data.d").
@@ -1637,7 +1656,14 @@ class ModelRunnerStep(MonitoredStep):
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
             )
+        if type(model_class) is LLModel or (
+            isinstance(model_class, str) and model_class == LLModel.__name__
+        ):
+            if outputs:
+                warnings.warn(
+                    "LLModel with existing outputs detected, overriding to default"
+                )
+            outputs = UsageResponseKeys.fields()
         model_parameters = model_parameters or (
             model_class.to_dict() if isinstance(model_class, Model) else {}
         )
@@ -1653,8 +1679,6 @@ class ModelRunnerStep(MonitoredStep):
             except mlrun.errors.MLRunNotFoundError:
                 raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
-        outputs = outputs or self._get_model_output_schema(model_artifact)
         model_artifact = (
             model_artifact.uri
             if isinstance(model_artifact, mlrun.artifacts.Artifact)
@@ -1720,28 +1744,13 @@ class ModelRunnerStep(MonitoredStep):
         self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data
     @staticmethod
-    def _get_model_output_schema(
-        model_artifact: Union[ModelArtifact, LLMPromptArtifact],
-    ) -> Optional[list[str]]:
-        if isinstance(
-            model_artifact,
-            ModelArtifact,
-        ):
-            return [feature.name for feature in model_artifact.spec.outputs]
-        elif isinstance(
-            model_artifact,
-            LLMPromptArtifact,
-        ):
-            _model_artifact = model_artifact.model_artifact
-            return [feature.name for feature in _model_artifact.spec.outputs]
-    @staticmethod
-    def _get_model_endpoint_output_schema(
+    def _get_model_endpoint_schema(
         name: str,
         project: str,
         uid: str,
-    ) -> list[str]:
+    ) -> tuple[list[str], list[str]]:
         output_schema = None
+        input_schema = None
         try:
             model_endpoint: mlrun.common.schemas.model_monitoring.ModelEndpoint = (
                 mlrun.db.get_run_db().get_model_endpoint(
@@ -1752,6 +1761,7 @@ class ModelRunnerStep(MonitoredStep):
                 )
             )
             output_schema = model_endpoint.spec.label_names
+            input_schema = model_endpoint.spec.feature_names
         except (
             mlrun.errors.MLRunNotFoundError,
             mlrun.errors.MLRunInvalidArgumentError,
@@ -1760,7 +1770,7 @@ class ModelRunnerStep(MonitoredStep):
                 f"Model endpoint not found, using default output schema for model {name}",
                 error=f"{type(ex).__name__}: {ex}",
             )
-        return output_schema
+        return input_schema, output_schema
     def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
         monitoring_data = deepcopy(
@@ -1776,6 +1786,36 @@ class ModelRunnerStep(MonitoredStep):
                 monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = split_path(
                     monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
                 )
+                mep_output_schema, mep_input_schema = None, None
+                output_schema = self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.OUTPUTS]
+                input_schema = self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.INPUTS]
+                if not output_schema or not input_schema:
+                    # if output or input schema is not provided, try to get it from the model endpoint
+                    mep_input_schema, mep_output_schema = (
+                        self._get_model_endpoint_schema(
+                            model,
+                            self.context.project,
+                            monitoring_data[model].get(
+                                schemas.MonitoringData.MODEL_ENDPOINT_UID, ""
+                            ),
+                        )
+                    )
+                self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.OUTPUTS] = (
+                    output_schema or mep_output_schema
+                )
+                self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.INPUTS] = (
+                    input_schema or mep_input_schema
+                )
             return monitoring_data
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1803,6 +1843,13 @@ class ModelRunnerStep(MonitoredStep):
                 .get(model_params.get("name"), {})
                 .get(schemas.MonitoringData.INPUT_PATH)
             )
+            model_params[schemas.MonitoringData.RESULT_PATH] = (
+                self.class_args.get(
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
+                )
+                .get(model_params.get("name"), {})
+                .get(schemas.MonitoringData.RESULT_PATH)
+            )
             model = get_class(model, namespace).from_dict(
                 model_params, init_with_params=True
             )

mlrun/serving/system_steps.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import random
+from copy import copy
 from datetime import timedelta
 from typing import Any, Optional, Union
@@ -22,6 +23,7 @@ import storey
 import mlrun
 import mlrun.artifacts
 import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.feature_store
 import mlrun.serving
 from mlrun.common.schemas import MonitoringData
 from mlrun.utils import get_data_from_path, logger
@@ -45,33 +47,20 @@ class MonitoringPreProcessor(storey.MapClass):
         result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
         input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
-        result = get_data_from_path(result_path, event.body.get(model, event.body))
         output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
         input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
-        logger.debug("output schema retrieved", output_schema=output_schema)
-        if isinstance(result, dict):
-            # transpose by key the outputs:
-            outputs = self.transpose_by_key(result, output_schema)
-            if not output_schema:
-                logger.warn(
-                    "Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
-                    "may not preserved"
-                )
-        else:
-            outputs = result
+        logger.debug(
+            "output and input schema retrieved",
+            output_schema=output_schema,
+            input_schema=input_schema,
+        )
-        event_inputs = event._metadata.get("inputs", {})
-        event_inputs = get_data_from_path(input_path, event_inputs)
-        if isinstance(event_inputs, dict):
-            # transpose by key the inputs:
-            inputs = self.transpose_by_key(event_inputs, input_schema)
-            if not input_schema:
-                logger.warn(
-                    "Input schema was not provided using by ModelRunnerStep:add_model, order "
-                    "may not preserved"
-                )
-        else:
-            inputs = event_inputs
+        outputs, new_output_schema = self.get_listed_data(
+            event.body.get(model, event.body), result_path, output_schema
+        )
+        inputs, new_input_schema = self.get_listed_data(
+            event._metadata.get("inputs", {}), input_path, input_schema
+        )
         if outputs and isinstance(outputs[0], list):
             if output_schema and len(output_schema) != len(outputs[0]):
@@ -96,15 +85,43 @@ class MonitoringPreProcessor(storey.MapClass):
                     "outputs and inputs are not in the same length check 'input_path' and "
                     "'output_path' was specified if needed"
                 )
-        request = {"inputs": inputs, "id": getattr(event, "id", None)}
-        resp = {"outputs": outputs}
+        request = {
+            "inputs": inputs,
+            "id": getattr(event, "id", None),
+            "input_schema": new_input_schema,
+        }
+        resp = {"outputs": outputs, "output_schema": new_output_schema}
         return request, resp
+    def get_listed_data(
+        self,
+        raw_data: dict,
+        data_path: Optional[Union[list[str], str]] = None,
+        schema: Optional[list[str]] = None,
+    ):
+        """Get data from a path and transpose it by keys if dict is provided."""
+        new_schema = None
+        data_from_path = get_data_from_path(data_path, raw_data)
+        if isinstance(data_from_path, dict):
+            # transpose by key the inputs:
+            listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
+            new_schema = new_schema or schema
+            if not schema:
+                logger.warn(
+                    f"No schema provided through add_model(); the order of {data_from_path} "
+                    "may not be preserved."
+                )
+        elif not isinstance(data_from_path, list):
+            listed_data = [data_from_path]
+        else:
+            listed_data = data_from_path
+        return listed_data, new_schema
     @staticmethod
     def transpose_by_key(
         data: dict, schema: Optional[Union[str, list[str]]] = None
-    ) -> Union[list[Any], list[list[Any]]]:
+    ) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
         """
         Transpose values from a dictionary by keys.
@@ -136,20 +153,27 @@ class MonitoringPreProcessor(storey.MapClass):
                          * If result is a matrix, returns a list of lists.
         :raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
+                mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
         """
+        new_schema = None
+        # Normalize keys in data:
+        normalize_data = {
+            mlrun.feature_store.api.norm_column_name(k): copy(v)
+            for k, v in data.items()
+        }
         # Normalize schema to list
         if not schema:
-            keys = list(data.keys())
+            keys = list(normalize_data.keys())
+            new_schema = keys
         elif isinstance(schema, str):
-            keys = [schema]
+            keys = [mlrun.feature_store.api.norm_column_name(schema)]
         else:
-            keys = schema
+            keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
-        values = [data[key] for key in keys if key in data]
+        values = [normalize_data[key] for key in keys if key in normalize_data]
         if len(values) != len(keys):
             raise mlrun.MLRunInvalidArgumentError(
-                f"Schema keys {keys} do not match the data keys {list(data.keys())}."
+                f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
             )
         # Detect if all are scalars ie: int,float,str
@@ -168,12 +192,12 @@ class MonitoringPreProcessor(storey.MapClass):
             mat = np.stack(arrays, axis=0)
             transposed = mat.T
         else:
-            return values[0]
+            return values[0], new_schema
         if transposed.shape[1] == 1 and transposed.shape[0] == 1:
             # Transform [[0]] -> [0]:
-            return transposed[:, 0].tolist()
-        return transposed.tolist()
+            return transposed[:, 0].tolist(), new_schema
+        return transposed.tolist(), new_schema
     def do(self, event):
         monitoring_event_list = []

mlrun 1.10.0rc19__py3-none-any.whl → 1.10.0rc21__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc19py3-none-any.whl → 1.10.0rc21py3-none-any.whl