PyPI - mlrun - Versions diffs - 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc14py3-none-any.whl → 1.10.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (48) hide show

mlrun/artifacts/base.py +0 -31
mlrun/artifacts/llm_prompt.py +6 -0
mlrun/artifacts/manager.py +0 -5
mlrun/common/constants.py +1 -0
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/functions.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
mlrun/common/schemas/workflow.py +2 -0
mlrun/config.py +1 -1
mlrun/datastore/model_provider/model_provider.py +42 -14
mlrun/datastore/model_provider/openai_provider.py +96 -15
mlrun/db/base.py +20 -0
mlrun/db/httpdb.py +64 -9
mlrun/db/nopdb.py +13 -0
mlrun/launcher/local.py +13 -0
mlrun/model_monitoring/__init__.py +1 -0
mlrun/model_monitoring/applications/base.py +176 -20
mlrun/model_monitoring/db/_schedules.py +84 -24
mlrun/model_monitoring/db/tsdb/base.py +72 -1
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
mlrun/model_monitoring/helpers.py +26 -4
mlrun/projects/pipelines.py +44 -24
mlrun/projects/project.py +26 -7
mlrun/runtimes/daskjob.py +6 -0
mlrun/runtimes/mpijob/abstract.py +6 -0
mlrun/runtimes/mpijob/v1.py +6 -0
mlrun/runtimes/nuclio/application/application.py +2 -0
mlrun/runtimes/nuclio/function.py +6 -0
mlrun/runtimes/nuclio/serving.py +12 -11
mlrun/runtimes/pod.py +21 -0
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/runtimes/utils.py +0 -2
mlrun/serving/server.py +122 -53
mlrun/serving/states.py +128 -44
mlrun/serving/system_steps.py +84 -58
mlrun/utils/helpers.py +82 -12
mlrun/utils/retryer.py +15 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA +2 -7
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/RECORD +48 -48
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/top_level.txt +0 -0

mlrun/serving/system_steps.py CHANGED Viewed

@@ -13,10 +13,10 @@
 # limitations under the License.
 import random
-from copy import deepcopy
 from datetime import timedelta
 from typing import Any, Optional, Union
+import numpy as np
 import storey
 import mlrun
@@ -24,7 +24,7 @@ import mlrun.artifacts
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.serving
 from mlrun.common.schemas import MonitoringData
-from mlrun.utils import logger
+from mlrun.utils import get_data_from_path, logger
 class MonitoringPreProcessor(storey.MapClass):
@@ -45,24 +45,13 @@ class MonitoringPreProcessor(storey.MapClass):
         result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
         input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
-        result = self._get_data_from_path(
-            result_path, event.body.get(model, event.body)
-        )
+        result = get_data_from_path(result_path, event.body.get(model, event.body))
         output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
         input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
         logger.debug("output schema retrieved", output_schema=output_schema)
         if isinstance(result, dict):
-            if len(result) > 1:
-                # transpose by key the outputs:
-                outputs = self.transpose_by_key(result, output_schema)
-            elif len(result) == 1:
-                outputs = (
-                    result[output_schema[0]]
-                    if output_schema
-                    else list(result.values())[0]
-                )
-            else:
-                outputs = []
+            # transpose by key the outputs:
+            outputs = self.transpose_by_key(result, output_schema)
             if not output_schema:
                 logger.warn(
                     "Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
@@ -72,16 +61,14 @@ class MonitoringPreProcessor(storey.MapClass):
             outputs = result
         event_inputs = event._metadata.get("inputs", {})
-        event_inputs = self._get_data_from_path(input_path, event_inputs)
+        event_inputs = get_data_from_path(input_path, event_inputs)
         if isinstance(event_inputs, dict):
-            if len(event_inputs) > 1:
-                # transpose by key the inputs:
-                inputs = self.transpose_by_key(event_inputs, input_schema)
-            else:
-                inputs = (
-                    event_inputs[input_schema[0]]
-                    if input_schema
-                    else list(result.values())[0]
+            # transpose by key the inputs:
+            inputs = self.transpose_by_key(event_inputs, input_schema)
+            if not input_schema:
+                logger.warn(
+                    "Input schema was not provided using by ModelRunnerStep:add_model, order "
+                    "may not preserved"
                 )
         else:
             inputs = event_inputs
@@ -104,6 +91,11 @@ class MonitoringPreProcessor(storey.MapClass):
                     output_len=len(outputs),
                     schema_len=len(output_schema),
                 )
+            if len(inputs) != len(outputs):
+                logger.warn(
+                    "outputs and inputs are not in the same length check 'input_path' and "
+                    "'output_path' was specified if needed"
+                )
         request = {"inputs": inputs, "id": getattr(event, "id", None)}
         resp = {"outputs": outputs}
@@ -111,41 +103,73 @@ class MonitoringPreProcessor(storey.MapClass):
     @staticmethod
     def transpose_by_key(
-        data_to_transpose, schema: Optional[list[str]] = None
-    ) -> list[list[float]]:
-        values = (
-            list(data_to_transpose.values())
-            if not schema
-            else [data_to_transpose[key] for key in schema]
-        )
-        if values and not isinstance(values[0], list):
-            values = [values]
-        transposed = (
-            list(map(list, zip(*values)))
-            if all(isinstance(v, list) for v in values) and len(values) > 1
-            else values
-        )
-        return transposed
+        data: dict, schema: Optional[Union[str, list[str]]] = None
+    ) -> Union[list[float], list[list[float]]]:
+        """
+        Transpose values from a dictionary by keys.
-    @staticmethod
-    def _get_data_from_path(
-        path: Union[str, list[str], None], data: dict
-    ) -> dict[str, Any]:
-        if isinstance(path, str):
-            output_data = data.get(path)
-        elif isinstance(path, list):
-            output_data = deepcopy(data)
-            for key in path:
-                output_data = output_data.get(key, {})
-        elif path is None:
-            output_data = data
+        Given a dictionary and an optional schema (a key or list of keys), this function:
+        - Extracts the values for the specified keys (or all keys if no schema is provided).
+        - Ensures the data is represented as a list of rows, then transposes it (i.e., switches rows to columns).
+        - Handles edge cases:
+            * If a single scalar or single-element list is provided, returns a flat list.
+            * If a single key is provided (as a string or a list with one element), handles it properly.
+            * If only one row with len of one remains after transposition, unwraps it to avoid nested list-of-one.
+        Example::
+            transpose_by_key({"a": 1})
+            # returns: [1]
+            transpose_by_key({"a": [1, 2]})
+            # returns: [1 ,2]
+            transpose_by_key({"a": [1, 2], "b": [3, 4]})
+            # returns: [[1, 3], [2, 4]]
+        :param data:     Dictionary with values that are either scalars or lists.
+        :param schema:   Optional key or list of keys to extract. If not provided, all keys are used.
+                         Can be a string (single key) or a list of strings.
+        :return:         Transposed values:
+                         * If result is a single column or row, returns a flat list.
+                         * If result is a matrix, returns a list of lists.
+        :raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
+        """
+        # Normalize schema to list
+        if not schema:
+            keys = list(data.keys())
+        elif isinstance(schema, str):
+            keys = [schema]
         else:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "Expected path be of type str or list of str or None"
+            keys = schema
+        values = [data[key] for key in keys]
+        # Detect if all are scalars ie: int,float,str
+        all_scalars = all(not isinstance(v, (list, tuple, np.ndarray)) for v in values)
+        all_lists = all(isinstance(v, (list, tuple, np.ndarray)) for v in values)
+        if not (all_scalars or all_lists):
+            raise ValueError(
+                "All values must be either scalars or lists of equal length."
             )
-        if isinstance(output_data, (int, float)):
-            output_data = [output_data]
-        return output_data
+        if all_scalars:
+            transposed = np.array([values])
+        elif all_lists and len(keys) > 1:
+            arrays = [np.array(v) for v in values]
+            mat = np.stack(arrays, axis=0)
+            transposed = mat.T
+        else:
+            return values[0]
+        if transposed.shape[1] == 1 and transposed.shape[0] == 1:
+            # Transform [[0]] -> [0]:
+            return transposed[:, 0].tolist()
+        return transposed.tolist()
     def do(self, event):
         monitoring_event_list = []
@@ -337,7 +361,9 @@ class SamplingStep(storey.MapClass):
             event=event,
             sampling_percentage=self.sampling_percentage,
         )
-        if self.sampling_percentage != 100:
+        if self.sampling_percentage != 100 and not event.get(
+            mm_schemas.StreamProcessingEvent.ERROR
+        ):
             request = event[mm_schemas.StreamProcessingEvent.REQUEST]
             num_of_inputs = len(request["inputs"])
             sampled_requests_indices = self._pick_random_requests(

mlrun/utils/helpers.py CHANGED Viewed

@@ -29,6 +29,7 @@ import traceback
 import typing
 import uuid
 import warnings
+from copy import deepcopy
 from datetime import datetime, timedelta, timezone
 from importlib import import_module, reload
 from os import path
@@ -162,14 +163,6 @@ def get_artifact_target(item: dict, project=None):
     return item["spec"].get("target_path")
-# TODO: Remove once data migration v5 is obsolete
-def is_legacy_artifact(artifact):
-    if isinstance(artifact, dict):
-        return "metadata" not in artifact
-    else:
-        return not hasattr(artifact, "metadata")
 logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
 missing = object()
@@ -794,6 +787,22 @@ def generate_artifact_uri(
     return artifact_uri
+def remove_tag_from_artifact_uri(uri: str) -> Optional[str]:
+    """
+    Remove the `:<tag>` part from a URI with pattern:
+    [store://][<project>/]<key>[#<iter>][:<tag>][@<tree>][^<uid>]
+    Returns the URI without the tag section.
+    Examples:
+        "store://proj/key:latest" => "store://proj/key"
+        "key#1:dev@tree^uid" => "key#1@tree^uid"
+        "store://key:tag" => "store://key"
+        "store://models/remote-model-project/my_model#0@tree" => unchanged (no tag)
+    """
+    return re.sub(r"(?<=/[^/:]\+):[^@^:\s]+(?=(@|\^|$))", "", uri)
 def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
     """
     Retrieve the full uri of the item's yaml in the hub.
@@ -1050,7 +1059,14 @@ def fill_function_hash(function_dict, tag=""):
 def retry_until_successful(
-    backoff: int, timeout: int, logger, verbose: bool, _function, *args, **kwargs
+    backoff: int,
+    timeout: int,
+    logger,
+    verbose: bool,
+    _function,
+    *args,
+    fatal_exceptions=(),
+    **kwargs,
 ):
     """
     Runs function with given *args and **kwargs.
@@ -1063,14 +1079,31 @@ def retry_until_successful(
     :param verbose: whether to log the failure on each retry
     :param _function: function to run
     :param args: functions args
+    :param fatal_exceptions: exception types that should not be retried
     :param kwargs: functions kwargs
     :return: function result
     """
-    return Retryer(backoff, timeout, logger, verbose, _function, *args, **kwargs).run()
+    return Retryer(
+        backoff,
+        timeout,
+        logger,
+        verbose,
+        _function,
+        *args,
+        fatal_exceptions=fatal_exceptions,
+        **kwargs,
+    ).run()
 async def retry_until_successful_async(
-    backoff: int, timeout: int, logger, verbose: bool, _function, *args, **kwargs
+    backoff: int,
+    timeout: int,
+    logger,
+    verbose: bool,
+    _function,
+    *args,
+    fatal_exceptions=(),
+    **kwargs,
 ):
     """
     Runs function with given *args and **kwargs.
@@ -1082,12 +1115,20 @@ async def retry_until_successful_async(
     :param logger: a logger so we can log the failures
     :param verbose: whether to log the failure on each retry
     :param _function: function to run
+    :param fatal_exceptions: exception types that should not be retried
     :param args: functions args
     :param kwargs: functions kwargs
     :return: function result
     """
     return await AsyncRetryer(
-        backoff, timeout, logger, verbose, _function, *args, **kwargs
+        backoff,
+        timeout,
+        logger,
+        verbose,
+        _function,
+        *args,
+        fatal_exceptions=fatal_exceptions,
+        **kwargs,
     ).run()
@@ -2352,3 +2393,32 @@ def encode_user_code(
             "Consider using `with_source_archive` to add user code as a remote source to the function."
         )
     return encoded
+def split_path(path: str) -> typing.Union[str, list[str], None]:
+    if path is not None:
+        parsed_path = path.split(".")
+        if len(parsed_path) == 1:
+            parsed_path = parsed_path[0]
+        return parsed_path
+    return path
+def get_data_from_path(
+    path: typing.Union[str, list[str], None], data: dict
+) -> dict[str, Any]:
+    if isinstance(path, str):
+        output_data = data.get(path)
+    elif isinstance(path, list):
+        output_data = deepcopy(data)
+        for key in path:
+            output_data = output_data.get(key, {})
+    elif path is None:
+        output_data = data
+    else:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Expected path be of type str or list of str or None"
+        )
+    if isinstance(output_data, (int, float)):
+        output_data = [output_data]
+    return output_data

mlrun/utils/retryer.py CHANGED Viewed

@@ -77,7 +77,17 @@ def create_exponential_backoff(base=2, max_value=120, scale_factor=1):
 class Retryer:
-    def __init__(self, backoff, timeout, logger, verbose, function, *args, **kwargs):
+    def __init__(
+        self,
+        backoff,
+        timeout,
+        logger,
+        verbose,
+        function,
+        *args,
+        fatal_exceptions=(),
+        **kwargs,
+    ):
         """
         Initialize function retryer with given *args and **kwargs.
         Tries to run it until success or timeout reached (timeout is optional)
@@ -89,6 +99,7 @@ class Retryer:
         :param verbose: whether to log the failure on each retry
         :param _function: function to run
         :param args: functions args
+        :param fatal_exceptions: exception types that should not be retried
         :param kwargs: functions kwargs
         """
         self.backoff = backoff
@@ -96,6 +107,7 @@ class Retryer:
         self.logger = logger
         self.verbose = verbose
         self.function = function
+        self.fatal_exceptions = tuple(fatal_exceptions or ())
         self.args = args
         self.kwargs = kwargs
         self.start_time = None
@@ -107,7 +119,8 @@ class Retryer:
         while not self._timeout_exceeded():
             next_interval = self.first_interval or next(self.backoff)
             result, exc, retry = self._perform_call(next_interval)
-            if retry:
+            if retry and type(exc) not in self.fatal_exceptions:
                 time.sleep(next_interval)
             elif not exc:
                 return result

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "5f421886e871ccc04e021cd67fc4597e39ab890c",
-  "version": "1.10.0-rc14"
+  "git_commit": "78045e1e85e7c81eee93682240c4ebe7b22fa67c",
+  "version": "1.10.0-rc16"
 }

{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlrun
-Version: 1.10.0rc14
+Version: 1.10.0rc16
 Summary: Tracking and config of machine learning runs
 Home-page: https://github.com/mlrun/mlrun
 Author: Yaron Haviv
@@ -28,7 +28,7 @@ Requires-Dist: aiohttp-retry~=2.9
 Requires-Dist: click~=8.1
 Requires-Dist: nest-asyncio~=1.0
 Requires-Dist: ipython~=8.10
-Requires-Dist: nuclio-jupyter~=0.11.1
+Requires-Dist: nuclio-jupyter~=0.11.2
 Requires-Dist: numpy<1.27.0,>=1.26.4
 Requires-Dist: pandas<2.2,>=1.2
 Requires-Dist: pyarrow<18,>=10.0
@@ -101,8 +101,6 @@ Provides-Extra: tdengine
 Requires-Dist: taos-ws-py==0.3.2; extra == "tdengine"
 Provides-Extra: snowflake
 Requires-Dist: snowflake-connector-python~=3.7; extra == "snowflake"
-Provides-Extra: openai
-Requires-Dist: openai~=1.88; extra == "openai"
 Provides-Extra: dev-postgres
 Requires-Dist: pytest-mock-resources[postgres]~=2.12; extra == "dev-postgres"
 Provides-Extra: kfp18
@@ -148,7 +146,6 @@ Requires-Dist: graphviz~=0.20.0; extra == "all"
 Requires-Dist: kafka-python~=2.1.0; extra == "all"
 Requires-Dist: mlflow~=2.22; extra == "all"
 Requires-Dist: msrest~=0.6.21; extra == "all"
-Requires-Dist: openai~=1.88; extra == "all"
 Requires-Dist: oss2==2.18.1; extra == "all"
 Requires-Dist: ossfs==2023.12.0; extra == "all"
 Requires-Dist: plotly~=5.23; extra == "all"
@@ -180,7 +177,6 @@ Requires-Dist: graphviz~=0.20.0; extra == "complete"
 Requires-Dist: kafka-python~=2.1.0; extra == "complete"
 Requires-Dist: mlflow~=2.22; extra == "complete"
 Requires-Dist: msrest~=0.6.21; extra == "complete"
-Requires-Dist: openai~=1.88; extra == "complete"
 Requires-Dist: oss2==2.18.1; extra == "complete"
 Requires-Dist: ossfs==2023.12.0; extra == "complete"
 Requires-Dist: plotly~=5.23; extra == "complete"
@@ -223,7 +219,6 @@ Requires-Dist: mlflow~=2.22; extra == "complete-api"
 Requires-Dist: mlrun-pipelines-kfp-v1-8~=0.5.7; extra == "complete-api"
 Requires-Dist: msrest~=0.6.21; extra == "complete-api"
 Requires-Dist: objgraph~=3.6; extra == "complete-api"
-Requires-Dist: openai~=1.88; extra == "complete-api"
 Requires-Dist: oss2==2.18.1; extra == "complete-api"
 Requires-Dist: ossfs==2023.12.0; extra == "complete-api"
 Requires-Dist: plotly~=5.23; extra == "complete-api"

mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc14py3-none-any.whl → 1.10.0rc16py3-none-any.whl