PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.10.0rc20__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.10.0rc20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (38) hide show

mlrun/__init__.py +21 -2
mlrun/common/constants.py +1 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/model_monitoring/constants.py +4 -11
mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
mlrun/datastore/__init__.py +9 -1
mlrun/datastore/model_provider/huggingface_provider.py +114 -26
mlrun/datastore/model_provider/model_provider.py +144 -70
mlrun/datastore/model_provider/openai_provider.py +95 -37
mlrun/db/base.py +0 -19
mlrun/db/httpdb.py +10 -46
mlrun/db/nopdb.py +0 -10
mlrun/launcher/base.py +13 -6
mlrun/model_monitoring/api.py +43 -22
mlrun/model_monitoring/applications/base.py +1 -1
mlrun/model_monitoring/controller.py +112 -38
mlrun/model_monitoring/db/_schedules.py +13 -9
mlrun/model_monitoring/stream_processing.py +16 -12
mlrun/platforms/__init__.py +3 -2
mlrun/projects/project.py +2 -2
mlrun/run.py +1 -1
mlrun/runtimes/base.py +5 -2
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/nuclio/application/application.py +84 -5
mlrun/runtimes/nuclio/function.py +3 -1
mlrun/serving/server.py +24 -0
mlrun/serving/states.py +80 -30
mlrun/serving/system_steps.py +60 -36
mlrun/utils/helpers.py +37 -13
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/METADATA +4 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/RECORD +37 -38
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/top_level.txt +0 -0

mlrun/runtimes/nuclio/application/application.py CHANGED Viewed

@@ -29,12 +29,13 @@ from mlrun.runtimes.nuclio.api_gateway import (
     APIGatewaySpec,
 )
 from mlrun.runtimes.nuclio.function import NuclioSpec, NuclioStatus
-from mlrun.utils import logger, update_in
+from mlrun.utils import is_valid_port, logger, update_in
 class ApplicationSpec(NuclioSpec):
     _dict_fields = NuclioSpec._dict_fields + [
         "internal_application_port",
+        "application_ports",
     ]
     def __init__(
@@ -79,6 +80,7 @@ class ApplicationSpec(NuclioSpec):
         state_thresholds=None,
         disable_default_http_trigger=None,
         internal_application_port=None,
+        application_ports=None,
     ):
         super().__init__(
             command=command,
@@ -126,11 +128,54 @@ class ApplicationSpec(NuclioSpec):
         self.min_replicas = min_replicas or 1
         self.max_replicas = max_replicas or 1
+        # initializing internal application port and application ports
+        self._internal_application_port = None
+        self._application_ports = []
+        application_ports = application_ports or []
+        # if internal_application_port is not provided, use the first application port
+        if not internal_application_port and len(application_ports) > 0:
+            internal_application_port = application_ports[0]
+        # the port of application sidecar to which traffic will be routed from a nuclio function
         self.internal_application_port = (
             internal_application_port
             or mlrun.mlconf.function.application.default_sidecar_internal_port
         )
+        # all exposed ports by the application sidecar
+        self.application_ports = application_ports
+    @property
+    def application_ports(self):
+        return self._application_ports
+    @application_ports.setter
+    def application_ports(self, ports):
+        """
+        Set the application ports for the application sidecar.
+        The internal application port is always included and always first.
+        """
+        # Handle None / single int
+        if ports is None:
+            ports = []
+        elif isinstance(ports, int):
+            ports = [ports]
+        elif not isinstance(ports, list):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Application ports must be a list of integers"
+            )
+        # Validate and normalize
+        cleaned_ports = []
+        for port in ports:
+            is_valid_port(port, raise_on_error=True)
+            if port != self.internal_application_port:
+                cleaned_ports.append(port)
+        self._application_ports = [self.internal_application_port] + cleaned_ports
     @property
     def internal_application_port(self):
         return self._internal_application_port
@@ -138,10 +183,13 @@ class ApplicationSpec(NuclioSpec):
     @internal_application_port.setter
     def internal_application_port(self, port):
         port = int(port)
-        if port < 0 or port > 65535:
-            raise ValueError("Port must be in the range 0-65535")
+        is_valid_port(port, raise_on_error=True)
         self._internal_application_port = port
+        # when setting new internal application port, ensure that it is included in the application ports
+        # it just triggers setter logic, so setting to the same value is a no-op
+        self.application_ports = self._application_ports
 class ApplicationStatus(NuclioStatus):
     def __init__(
@@ -222,6 +270,32 @@ class ApplicationRuntime(RemoteRuntime):
     def set_internal_application_port(self, port: int):
         self.spec.internal_application_port = port
+    def with_sidecar(
+        self,
+        name: typing.Optional[str] = None,
+        image: typing.Optional[str] = None,
+        ports: typing.Optional[typing.Union[int, list[int]]] = None,
+        command: typing.Optional[str] = None,
+        args: typing.Optional[list[str]] = None,
+    ):
+        # wraps with_sidecar just to set the application ports
+        super().with_sidecar(
+            name=name,
+            image=image,
+            ports=ports,
+            command=command,
+            args=args,
+        )
+        if ports:
+            if self.spec.internal_application_port != ports[0]:
+                logger.info(
+                    f"Setting internal application port to the first port from the sidecar: {ports[0]}. "
+                    f"If this is not intended, please set the internal_application_port explicitly."
+                )
+                self.spec.internal_application_port = ports[0]
+            self.spec.application_ports = ports
     def pre_deploy_validation(self):
         super().pre_deploy_validation()
         if not self.spec.config.get("spec.sidecars"):
@@ -431,6 +505,7 @@ class ApplicationRuntime(RemoteRuntime):
         ssl_redirect: typing.Optional[bool] = None,
         set_as_default: bool = False,
         gateway_timeout: typing.Optional[int] = None,
+        port: typing.Optional[int] = None,
     ):
         """
         Create the application API gateway. Once the application is deployed, the API gateway can be created.
@@ -447,6 +522,8 @@ class ApplicationRuntime(RemoteRuntime):
         :param set_as_default:          Set the API gateway as the default for the application (`status.api_gateway`)
         :param gateway_timeout:         nginx ingress timeout in sec (request timeout, when will the gateway return an
                                         error)
+        :param port:                    The API gateway port, used only when direct_port_access=True
         :return:                        The API gateway URL
         """
         if not name:
@@ -467,7 +544,9 @@ class ApplicationRuntime(RemoteRuntime):
                 "Authentication credentials not provided"
             )
-        ports = self.spec.internal_application_port if direct_port_access else []
+        ports = (
+            port or self.spec.internal_application_port if direct_port_access else []
+        )
         api_gateway = APIGateway(
             APIGatewayMetadata(
@@ -728,7 +807,7 @@ class ApplicationRuntime(RemoteRuntime):
         self.with_sidecar(
             name=self.status.sidecar_name,
             image=self.status.application_image,
-            ports=self.spec.internal_application_port,
+            ports=self.spec.application_ports,
             command=self.spec.command,
             args=self.spec.args,
         )

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -29,6 +29,7 @@ from kubernetes import client
 from nuclio.deploy import find_dashboard_url, get_deploy_status
 from nuclio.triggers import V3IOStreamTrigger
+import mlrun.common.constants
 import mlrun.db
 import mlrun.errors
 import mlrun.k8s_utils
@@ -830,7 +831,8 @@ class RemoteRuntime(KubeResource):
     def _get_runtime_env(self):
         # for runtime specific env var enrichment (before deploy)
         runtime_env = {
-            "MLRUN_ACTIVE_PROJECT": self.metadata.project or mlconf.active_project,
+            mlrun.common.constants.MLRUN_ACTIVE_PROJECT: self.metadata.project
+            or mlconf.active_project,
         }
         if mlconf.httpdb.api_url:
             runtime_env["MLRUN_DBPATH"] = mlconf.httpdb.api_url

mlrun/serving/server.py CHANGED Viewed

@@ -361,6 +361,7 @@ def add_error_raiser_step(
             raise_exception=monitored_step.raise_exception,
             models_names=list(monitored_step.class_args["models"].keys()),
             model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
+            function=monitored_step.function,
         )
         if monitored_step.responder:
             monitored_step.responder = False
@@ -745,6 +746,26 @@ async def async_execute_graph(
     return responses
+def _is_inside_asyncio_loop():
+    try:
+        asyncio.get_running_loop()
+        return True
+    except RuntimeError:
+        return False
+# Workaround for running with local=True in Jupyter (ML-10620)
+def _workaround_asyncio_nesting():
+    try:
+        import nest_asyncio
+    except ImportError:
+        raise mlrun.errors.MLRunRuntimeError(
+            "Cannot execute graph from within an already running asyncio loop. "
+            "Attempt to import nest_asyncio as a workaround failed as well."
+        )
+    nest_asyncio.apply()
 def execute_graph(
     context: MLClientCtx,
     data: DataItem,
@@ -770,6 +791,9 @@ def execute_graph(
     :return: A list of responses.
     """
+    if _is_inside_asyncio_loop():
+        _workaround_asyncio_nesting()
     return asyncio.run(
         async_execute_graph(
             context,

mlrun/serving/states.py CHANGED Viewed

@@ -24,6 +24,7 @@ import inspect
 import os
 import pathlib
 import traceback
+import warnings
 from abc import ABC
 from copy import copy, deepcopy
 from inspect import getfullargspec, signature
@@ -43,12 +44,16 @@ from mlrun.datastore.datastore_profile import (
     DatastoreProfileV3io,
     datastore_profile_read,
 )
-from mlrun.datastore.model_provider.model_provider import ModelProvider
+from mlrun.datastore.model_provider.model_provider import (
+    InvokeResponseFormat,
+    ModelProvider,
+    UsageResponseKeys,
+)
 from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
-from mlrun.utils import get_data_from_path, logger, split_path
+from mlrun.utils import get_data_from_path, logger, set_data_by_path, split_path
 from ..config import config
-from ..datastore import get_stream_pusher
+from ..datastore import _DummyStream, get_stream_pusher
 from ..datastore.utils import (
     get_kafka_brokers_from_dict,
     parse_kafka_url,
@@ -1206,10 +1211,15 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
 class LLModel(Model):
     def __init__(
-        self, name: str, input_path: Optional[Union[str, list[str]]] = None, **kwargs
+        self,
+        name: str,
+        input_path: Optional[Union[str, list[str]]] = None,
+        result_path: Optional[Union[str, list[str]]] = None,
+        **kwargs,
     ):
         super().__init__(name, **kwargs)
         self._input_path = split_path(input_path)
+        self._result_path = split_path(result_path)
     def predict(
         self,
@@ -1221,11 +1231,14 @@ class LLModel(Model):
         if isinstance(
             self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
         ) and isinstance(self.model_provider, ModelProvider):
-            body["result"] = self.model_provider.invoke(
+            response_with_stats = self.model_provider.invoke(
                 messages=messages,
-                as_str=True,
+                invoke_response_format=InvokeResponseFormat.USAGE,
                 **(model_configuration or {}),
             )
+            set_data_by_path(
+                path=self._result_path, data=body, value=response_with_stats
+            )
         return body
     async def predict_async(
@@ -1238,11 +1251,14 @@ class LLModel(Model):
         if isinstance(
             self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
         ) and isinstance(self.model_provider, ModelProvider):
-            body["result"] = await self.model_provider.async_invoke(
+            response_with_stats = await self.model_provider.async_invoke(
                 messages=messages,
-                as_str=True,
+                invoke_response_format=InvokeResponseFormat.USAGE,
                 **(model_configuration or {}),
             )
+            set_data_by_path(
+                path=self._result_path, data=body, value=response_with_stats
+            )
         return body
     def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
@@ -1287,6 +1303,7 @@ class LLModel(Model):
                 {
                     place_holder: input_data.get(body_map["field"])
                     for place_holder, body_map in prompt_legend.items()
+                    if input_data.get(body_map["field"])
                 }
                 if prompt_legend
                 else {}
@@ -1608,6 +1625,9 @@ class ModelRunnerStep(MonitoredStep):
           :param outputs:             list of the model outputs (e.g. labels) ,if provided will override the outputs
                                       that been configured in the model artifact, please note that those outputs need to
                                       be equal to the model_class predict method outputs (length, and order)
+                                      When using LLModel, the output will be overridden with UsageResponseKeys.fields().
           :param input_path:          when specified selects the key/path in the event to use as model monitoring inputs
                                       this require that the event body will behave like a dict, expects scopes to be
                                       defined by dot notation (e.g "data.d").
@@ -1636,7 +1656,14 @@ class ModelRunnerStep(MonitoredStep):
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
             )
+        if type(model_class) is LLModel or (
+            isinstance(model_class, str) and model_class == LLModel.__name__
+        ):
+            if outputs:
+                warnings.warn(
+                    "LLModel with existing outputs detected, overriding to default"
+                )
+            outputs = UsageResponseKeys.fields()
         model_parameters = model_parameters or (
             model_class.to_dict() if isinstance(model_class, Model) else {}
         )
@@ -1652,8 +1679,6 @@ class ModelRunnerStep(MonitoredStep):
             except mlrun.errors.MLRunNotFoundError:
                 raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
-        outputs = outputs or self._get_model_output_schema(model_artifact)
         model_artifact = (
             model_artifact.uri
             if isinstance(model_artifact, mlrun.artifacts.Artifact)
@@ -1719,28 +1744,13 @@ class ModelRunnerStep(MonitoredStep):
         self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data
     @staticmethod
-    def _get_model_output_schema(
-        model_artifact: Union[ModelArtifact, LLMPromptArtifact],
-    ) -> Optional[list[str]]:
-        if isinstance(
-            model_artifact,
-            ModelArtifact,
-        ):
-            return [feature.name for feature in model_artifact.spec.outputs]
-        elif isinstance(
-            model_artifact,
-            LLMPromptArtifact,
-        ):
-            _model_artifact = model_artifact.model_artifact
-            return [feature.name for feature in _model_artifact.spec.outputs]
-    @staticmethod
-    def _get_model_endpoint_output_schema(
+    def _get_model_endpoint_schema(
         name: str,
         project: str,
         uid: str,
-    ) -> list[str]:
+    ) -> tuple[list[str], list[str]]:
         output_schema = None
+        input_schema = None
         try:
             model_endpoint: mlrun.common.schemas.model_monitoring.ModelEndpoint = (
                 mlrun.db.get_run_db().get_model_endpoint(
@@ -1751,6 +1761,7 @@ class ModelRunnerStep(MonitoredStep):
                 )
             )
             output_schema = model_endpoint.spec.label_names
+            input_schema = model_endpoint.spec.feature_names
         except (
             mlrun.errors.MLRunNotFoundError,
             mlrun.errors.MLRunInvalidArgumentError,
@@ -1759,7 +1770,7 @@ class ModelRunnerStep(MonitoredStep):
                 f"Model endpoint not found, using default output schema for model {name}",
                 error=f"{type(ex).__name__}: {ex}",
             )
-        return output_schema
+        return input_schema, output_schema
     def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
         monitoring_data = deepcopy(
@@ -1775,6 +1786,36 @@ class ModelRunnerStep(MonitoredStep):
                 monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = split_path(
                     monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
                 )
+                mep_output_schema, mep_input_schema = None, None
+                output_schema = self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.OUTPUTS]
+                input_schema = self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.INPUTS]
+                if not output_schema or not input_schema:
+                    # if output or input schema is not provided, try to get it from the model endpoint
+                    mep_input_schema, mep_output_schema = (
+                        self._get_model_endpoint_schema(
+                            model,
+                            self.context.project,
+                            monitoring_data[model].get(
+                                schemas.MonitoringData.MODEL_ENDPOINT_UID, ""
+                            ),
+                        )
+                    )
+                self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.OUTPUTS] = (
+                    output_schema or mep_output_schema
+                )
+                self.class_args[
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
+                ][model][schemas.MonitoringData.INPUTS] = (
+                    input_schema or mep_input_schema
+                )
             return monitoring_data
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1802,6 +1843,13 @@ class ModelRunnerStep(MonitoredStep):
                 .get(model_params.get("name"), {})
                 .get(schemas.MonitoringData.INPUT_PATH)
             )
+            model_params[schemas.MonitoringData.RESULT_PATH] = (
+                self.class_args.get(
+                    mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
+                )
+                .get(model_params.get("name"), {})
+                .get(schemas.MonitoringData.RESULT_PATH)
+            )
             model = get_class(model, namespace).from_dict(
                 model_params, init_with_params=True
             )
@@ -3099,6 +3147,8 @@ def _init_async_objects(context, steps):
                             context=context,
                             **options,
                         )
+                    elif stream_path.startswith("dummy://"):
+                        step._async_object = _DummyStream(context=context, **options)
                     else:
                         if stream_path.startswith("v3io://"):
                             endpoint, stream_path = parse_path(step.path)

mlrun/serving/system_steps.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import random
+from copy import copy
 from datetime import timedelta
 from typing import Any, Optional, Union
@@ -22,6 +23,7 @@ import storey
 import mlrun
 import mlrun.artifacts
 import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.feature_store
 import mlrun.serving
 from mlrun.common.schemas import MonitoringData
 from mlrun.utils import get_data_from_path, logger
@@ -45,33 +47,20 @@ class MonitoringPreProcessor(storey.MapClass):
         result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
         input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
-        result = get_data_from_path(result_path, event.body.get(model, event.body))
         output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
         input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
-        logger.debug("output schema retrieved", output_schema=output_schema)
-        if isinstance(result, dict):
-            # transpose by key the outputs:
-            outputs = self.transpose_by_key(result, output_schema)
-            if not output_schema:
-                logger.warn(
-                    "Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
-                    "may not preserved"
-                )
-        else:
-            outputs = result
+        logger.debug(
+            "output and input schema retrieved",
+            output_schema=output_schema,
+            input_schema=input_schema,
+        )
-        event_inputs = event._metadata.get("inputs", {})
-        event_inputs = get_data_from_path(input_path, event_inputs)
-        if isinstance(event_inputs, dict):
-            # transpose by key the inputs:
-            inputs = self.transpose_by_key(event_inputs, input_schema)
-            if not input_schema:
-                logger.warn(
-                    "Input schema was not provided using by ModelRunnerStep:add_model, order "
-                    "may not preserved"
-                )
-        else:
-            inputs = event_inputs
+        outputs, new_output_schema = self.get_listed_data(
+            event.body.get(model, event.body), result_path, output_schema
+        )
+        inputs, new_input_schema = self.get_listed_data(
+            event._metadata.get("inputs", {}), input_path, input_schema
+        )
         if outputs and isinstance(outputs[0], list):
             if output_schema and len(output_schema) != len(outputs[0]):
@@ -96,15 +85,43 @@ class MonitoringPreProcessor(storey.MapClass):
                     "outputs and inputs are not in the same length check 'input_path' and "
                     "'output_path' was specified if needed"
                 )
-        request = {"inputs": inputs, "id": getattr(event, "id", None)}
-        resp = {"outputs": outputs}
+        request = {
+            "inputs": inputs,
+            "id": getattr(event, "id", None),
+            "input_schema": new_input_schema,
+        }
+        resp = {"outputs": outputs, "output_schema": new_output_schema}
         return request, resp
+    def get_listed_data(
+        self,
+        raw_data: dict,
+        data_path: Optional[Union[list[str], str]] = None,
+        schema: Optional[list[str]] = None,
+    ):
+        """Get data from a path and transpose it by keys if dict is provided."""
+        new_schema = None
+        data_from_path = get_data_from_path(data_path, raw_data)
+        if isinstance(data_from_path, dict):
+            # transpose by key the inputs:
+            listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
+            new_schema = new_schema or schema
+            if not schema:
+                logger.warn(
+                    f"No schema provided through add_model(); the order of {data_from_path} "
+                    "may not be preserved."
+                )
+        elif not isinstance(data_from_path, list):
+            listed_data = [data_from_path]
+        else:
+            listed_data = data_from_path
+        return listed_data, new_schema
     @staticmethod
     def transpose_by_key(
         data: dict, schema: Optional[Union[str, list[str]]] = None
-    ) -> Union[list[Any], list[list[Any]]]:
+    ) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
         """
         Transpose values from a dictionary by keys.
@@ -136,20 +153,27 @@ class MonitoringPreProcessor(storey.MapClass):
                          * If result is a matrix, returns a list of lists.
         :raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
+                mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
         """
+        new_schema = None
+        # Normalize keys in data:
+        normalize_data = {
+            mlrun.feature_store.api.norm_column_name(k): copy(v)
+            for k, v in data.items()
+        }
         # Normalize schema to list
         if not schema:
-            keys = list(data.keys())
+            keys = list(normalize_data.keys())
+            new_schema = keys
         elif isinstance(schema, str):
-            keys = [schema]
+            keys = [mlrun.feature_store.api.norm_column_name(schema)]
         else:
-            keys = schema
+            keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
-        values = [data[key] for key in keys if key in data]
+        values = [normalize_data[key] for key in keys if key in normalize_data]
         if len(values) != len(keys):
             raise mlrun.MLRunInvalidArgumentError(
-                f"Schema keys {keys} do not match the data keys {list(data.keys())}."
+                f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
             )
         # Detect if all are scalars ie: int,float,str
@@ -168,12 +192,12 @@ class MonitoringPreProcessor(storey.MapClass):
             mat = np.stack(arrays, axis=0)
             transposed = mat.T
         else:
-            return values[0]
+            return values[0], new_schema
         if transposed.shape[1] == 1 and transposed.shape[0] == 1:
             # Transform [[0]] -> [0]:
-            return transposed[:, 0].tolist()
-        return transposed.tolist()
+            return transposed[:, 0].tolist(), new_schema
+        return transposed.tolist(), new_schema
     def do(self, event):
         monitoring_event_list = []

mlrun/utils/helpers.py CHANGED Viewed

@@ -464,17 +464,11 @@ def to_date_str(d):
     return ""
-def normalize_name(name: str, verbose: bool = True):
+def normalize_name(name: str):
     # TODO: Must match
     # [a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?
     name = re.sub(r"\s+", "-", name)
     if "_" in name:
-        if verbose:
-            warnings.warn(
-                "Names with underscore '_' are about to be deprecated, use dashes '-' instead. "
-                f"Replacing '{name}' underscores with dashes.",
-                FutureWarning,
-            )
         name = name.replace("_", "-")
     return name.lower()
@@ -835,7 +829,7 @@ def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Invalid character '/' in function name or source name"
             ) from exc
-    name = normalize_name(name=name, verbose=False)
+    name = normalize_name(name=name)
     if not source_name:
         # Searching item in all sources
         sources = db.list_hub_sources(item_name=name, tag=tag)
@@ -2409,9 +2403,7 @@ def split_path(path: str) -> typing.Union[str, list[str], None]:
     return path
-def get_data_from_path(
-    path: typing.Union[str, list[str], None], data: dict
-) -> dict[str, Any]:
+def get_data_from_path(path: typing.Union[str, list[str], None], data: dict) -> Any:
     if isinstance(path, str):
         output_data = data.get(path)
     elif isinstance(path, list):
@@ -2424,6 +2416,38 @@ def get_data_from_path(
         raise mlrun.errors.MLRunInvalidArgumentError(
             "Expected path be of type str or list of str or None"
         )
-    if isinstance(output_data, (int, float)):
-        output_data = [output_data]
     return output_data
+def is_valid_port(port: int, raise_on_error: bool = False) -> bool:
+    if not port:
+        return False
+    if 0 <= port <= 65535:
+        return True
+    if raise_on_error:
+        raise ValueError("Port must be in the range 0–65535")
+    return False
+def set_data_by_path(
+    path: typing.Union[str, list[str], None], data: dict, value
+) -> None:
+    if path is None:
+        if not isinstance(value, dict):
+            raise ValueError("When path is None, value must be a dictionary.")
+        data.update(value)
+    elif isinstance(path, str):
+        data[path] = value
+    elif isinstance(path, list):
+        current = data
+        for key in path[:-1]:
+            if key not in current or not isinstance(current[key], dict):
+                current[key] = {}
+            current = current[key]
+        current[path[-1]] = value
+    else:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Expected path to be of type str or list of str"
+        )

mlrun 1.10.0rc18__py3-none-any.whl → 1.10.0rc20__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.10.0rc20py3-none-any.whl