PyPI - mlrun - Versions diffs - 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl - Mend

mlrun 1.10.0rc16py3-none-any.whl → 1.10.1rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (101) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +9 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/common/secrets.py +22 -1
mlrun/config.py +34 -21
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +265 -7
mlrun/datastore/datastore.py +10 -5
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +367 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +211 -74
mlrun/datastore/model_provider/openai_provider.py +243 -71
mlrun/datastore/s3.py +24 -2
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +27 -19
mlrun/db/httpdb.py +57 -48
mlrun/db/nopdb.py +25 -10
mlrun/execution.py +55 -13
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +2 -0
mlrun/model.py +9 -3
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +388 -138
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +36 -13
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
mlrun/model_monitoring/helpers.py +28 -5
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +16 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +157 -69
mlrun/run.py +97 -20
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +147 -17
mlrun/runtimes/nuclio/function.py +72 -27
mlrun/runtimes/nuclio/serving.py +102 -20
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +54 -13
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +230 -40
mlrun/serving/states.py +605 -232
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +136 -81
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +215 -83
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/mail.py +38 -15
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0

mlrun/k8s_utils.py CHANGED Viewed

@@ -26,6 +26,10 @@ from .config import config as mlconfig
 _running_inside_kubernetes_cluster = None
+K8sObj = typing.Union[kubernetes.client.V1Affinity, kubernetes.client.V1Toleration]
+SanitizedK8sObj = dict[str, typing.Any]
+K8sObjList = typing.Union[list[K8sObj], list[SanitizedK8sObj]]
 def is_running_inside_kubernetes_cluster():
     global _running_inside_kubernetes_cluster
@@ -232,6 +236,54 @@ def validate_node_selectors(
     return True
+def sanitize_k8s_objects(
+    k8s_objects: typing.Union[None, K8sObjList, SanitizedK8sObj, K8sObj],
+) -> typing.Union[list[SanitizedK8sObj], SanitizedK8sObj]:
+    """Convert K8s objects to dicts. Handles single objects or lists."""
+    api_client = kubernetes.client.ApiClient()
+    if not k8s_objects:
+        return k8s_objects
+    def _sanitize_k8s_object(k8s_obj):
+        return (
+            api_client.sanitize_for_serialization(k8s_obj)
+            if hasattr(k8s_obj, "to_dict")
+            else k8s_obj
+        )
+    return (
+        [_sanitize_k8s_object(k8s_obj) for k8s_obj in k8s_objects]
+        if isinstance(k8s_objects, list)
+        else _sanitize_k8s_object(k8s_objects)
+    )
+def sanitize_scheduling_configuration(
+    tolerations: typing.Optional[list[kubernetes.client.V1Toleration]] = None,
+    affinity: typing.Optional[kubernetes.client.V1Affinity] = None,
+) -> tuple[
+    typing.Optional[list[dict]],
+    typing.Optional[dict],
+]:
+    """
+    Sanitizes pod scheduling configuration for serialization.
+    Takes affinity and tolerations and converts them to
+    JSON-serializable dictionaries using the Kubernetes API client's
+    sanitization method.
+    Args:
+        affinity: Pod affinity/anti-affinity rules
+        tolerations: List of toleration rules
+    Returns:
+        Tuple of (sanitized_affinity, sanitized_tolerations)
+        - affinity: Sanitized dict representation or None
+        - tolerations: List of sanitized dict representations or None
+    """
+    return sanitize_k8s_objects(tolerations), sanitize_k8s_objects(affinity)
 def enrich_preemption_mode(
     preemption_mode: typing.Optional[str],
     node_selector: dict[str, str],
@@ -269,8 +321,8 @@ def enrich_preemption_mode(
         )
     enriched_node_selector = copy.deepcopy(node_selector or {})
-    enriched_tolerations = copy.deepcopy(tolerations or [])
-    enriched_affinity = copy.deepcopy(affinity)
+    enriched_tolerations = _safe_copy_tolerations(tolerations or [])
+    enriched_affinity = _safe_copy_affinity(affinity)
     preemptible_tolerations = generate_preemptible_tolerations()
     if handler := _get_mode_handler(preemption_mode):
@@ -288,6 +340,57 @@ def enrich_preemption_mode(
     )
+def _safe_copy_tolerations(
+    tolerations: list[kubernetes.client.V1Toleration],
+) -> list[kubernetes.client.V1Toleration]:
+    """
+    Safely copy a list of V1Toleration objects without mutating the originals.
+    Explicitly reconstructs V1Toleration objects instead of using deepcopy() to avoid
+    serialization errors with K8s client objects that contain threading primitives
+    and non-copyable elements like RLock objects.
+    Args:
+        tolerations: List of V1Toleration objects to copy
+    Returns:
+        New list containing copied V1Toleration objects with identical field values"""
+    return [
+        kubernetes.client.V1Toleration(
+            effect=toleration.effect,
+            key=toleration.key,
+            value=toleration.value,
+            operator=toleration.operator,
+            toleration_seconds=toleration.toleration_seconds,
+        )
+        for toleration in tolerations
+    ]
+def _safe_copy_affinity(
+    affinity: kubernetes.client.V1Affinity,
+) -> kubernetes.client.V1Affinity:
+    """
+    Safely create a deep copy of a V1Affinity object.
+    Uses K8s API client serialization/deserialization instead of deepcopy() to avoid
+    errors with threading primitives and complex internal structures in K8s objects.
+    Serializes to dict then deserializes back to a clean V1Affinity object.
+    Args:
+        affinity: V1Affinity object to copy, or None
+    Returns:
+        New V1Affinity object with identical field values, or None if input was None
+    """
+    if not affinity:
+        return None
+    api_client = kubernetes.client.ApiClient()
+    # Convert to dict then back to object properly
+    affinity_dict = api_client.sanitize_for_serialization(affinity)
+    return api_client._ApiClient__deserialize(affinity_dict, "V1Affinity")
 def _get_mode_handler(mode: str):
     return {
         mlrun.common.schemas.PreemptionModes.prevent: _handle_prevent_mode,
@@ -367,20 +470,6 @@ def _handle_allow_mode(
     list[kubernetes.client.V1Toleration],
     typing.Optional[kubernetes.client.V1Affinity],
 ]:
-    for op in [
-        mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value,
-        mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value,
-    ]:
-        affinity = _prune_affinity_node_selector_requirement(
-            generate_preemptible_node_selector_requirements(op),
-            affinity=affinity,
-        )
-    node_selector = _prune_node_selector(
-        mlconfig.get_preemptible_node_selector(),
-        enriched_node_selector=node_selector,
-    )
     tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
     return node_selector, tolerations, affinity

mlrun/launcher/base.py CHANGED Viewed

@@ -157,6 +157,19 @@ class BaseLauncher(abc.ABC):
         ]:
             mlrun.utils.helpers.warn_on_deprecated_image(image)
+        # Raise an error if retry is configured for a runtime that doesn't support retries.
+        # For local runs, we intentionally skip this validation and allow the run to proceed, since they are typically
+        # used for debugging purposes, and in such cases we avoid blocking their execution.
+        if (
+            not mlrun.runtimes.RuntimeKinds.is_local_runtime(runtime.kind)
+            and run.spec.retry.count
+            and runtime.kind not in mlrun.runtimes.RuntimeKinds.retriable_runtimes()
+        ):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Retry is not supported for {runtime.kind} runtime, supported runtimes are: "
+                f"{mlrun.runtimes.RuntimeKinds.retriable_runtimes()}"
+            )
     @staticmethod
     def _validate_output_path(
         runtime: "mlrun.runtimes.BaseRuntime",
@@ -268,12 +281,6 @@ class BaseLauncher(abc.ABC):
         run.metadata.name = mlrun.utils.normalize_name(
             name=name or run.metadata.name or def_name,
-            # if name or runspec.metadata.name are set then it means that is user defined name and we want to warn the
-            # user that the passed name needs to be set without underscore, if its not user defined but rather enriched
-            # from the handler(function) name then we replace the underscore without warning the user.
-            # most of the time handlers will have `_` in the handler name (python convention is to separate function
-            # words with `_`), therefore we don't want to be noisy when normalizing the run name
-            verbose=bool(name or run.metadata.name),
         )
         mlrun.utils.verify_field_regex(
             "run.metadata.name", run.metadata.name, mlrun.utils.regex.run_name

mlrun/launcher/local.py CHANGED Viewed

@@ -243,6 +243,8 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
         # if the handler has module prefix force "local" (vs "handler") runtime
         kind = "local" if isinstance(handler, str) and "." in handler else ""
+        # Create temporary local function for execution
         fn = mlrun.new_function(meta.name, command=command, args=args, kind=kind)
         fn.metadata = meta
         setattr(fn, "_is_run_local", True)

mlrun/model.py CHANGED Viewed

@@ -29,6 +29,7 @@ import pydantic.v1.error_wrappers
 import mlrun
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas.notification
+import mlrun.common.secrets
 import mlrun.utils.regex
 from .utils import (
@@ -667,7 +668,7 @@ class ImageBuilder(ModelObj):
         """
         requirements = requirements or []
         self._verify_list(requirements, "requirements")
-        resolved_requirements = self._resolve_requirements(
+        resolved_requirements = self.resolve_requirements(
             requirements, requirements_file
         )
         requirements = self.requirements or [] if not overwrite else []
@@ -680,7 +681,7 @@ class ImageBuilder(ModelObj):
         self.requirements = requirements
     @staticmethod
-    def _resolve_requirements(requirements: list, requirements_file: str = "") -> list:
+    def resolve_requirements(requirements: list, requirements_file: str = "") -> list:
         requirements = requirements or []
         requirements_to_resolve = []
@@ -1616,7 +1617,12 @@ class RunTemplate(ModelObj):
         :returns: The RunTemplate object
         """
+        if kind == "azure_vault" and isinstance(source, dict):
+            candidate_secret_name = (source.get("k8s_secret") or "").strip()
+            if candidate_secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(
+                    candidate_secret_name
+                )
         if kind == "vault" and isinstance(source, list):
             source = {"project": self.metadata.project, "secrets": source}

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -18,9 +18,8 @@ from datetime import datetime
 import numpy as np
 import pandas as pd
+from deprecated import deprecated
-import mlrun.artifacts
-import mlrun.common.helpers
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.datastore.base
 import mlrun.feature_store
@@ -31,8 +30,9 @@ from mlrun.common.schemas.model_monitoring import (
     FunctionURI,
 )
 from mlrun.data_types.infer import InferOptions, get_df_stats
-from mlrun.utils import datetime_now, logger
+from mlrun.utils import check_if_hub_uri, datetime_now, logger, merge_requirements
+from ..common.schemas.hub import HubModuleType
 from .helpers import update_model_endpoint_last_request
 # A union of all supported dataset types:
@@ -47,6 +47,14 @@ DatasetType = typing.Union[
 ]
+# TODO: Remove this in 1.12.0
+@deprecated(
+    version="1.10.0",
+    reason="This function is deprecated and will be removed in 1.12. You can generate a model endpoint by either "
+    "deploying a monitored serving function as a real-time service or running it as an offline job. "
+    "To retrieve model endpoints, use `project.list_model_endpoints()`",
+    category=FutureWarning,
+)
 def get_or_create_model_endpoint(
     project: str,
     model_endpoint_name: str,
@@ -69,8 +77,8 @@ def get_or_create_model_endpoint(
     :param model_endpoint_name:      If a new model endpoint is created, the model endpoint name will be presented
                                      under this endpoint (applicable only to new endpoint_id).
     :param model_path:               The model store path (applicable only to new endpoint_id).
-    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record based
-                                     on the provided `endpoint_id`.
+    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record with a
+                                     newly generated ID.
     :param function_name:            If a new model endpoint is created, use this function name.
     :param function_tag:             If a new model endpoint is created, use this function tag.
     :param context:                  MLRun context. If `function_name` not provided, use the context to generate the
@@ -93,25 +101,26 @@ def get_or_create_model_endpoint(
         function_name = FunctionURI.from_string(
             context.to_dict()["spec"]["function"]
         ).function
-    try:
-        model_endpoint = db_session.get_model_endpoint(
-            project=project,
-            name=model_endpoint_name,
-            endpoint_id=endpoint_id,
-            function_name=function_name,
-            function_tag=function_tag or "latest",
-            feature_analysis=feature_analysis,
-        )
-        # If other fields provided, validate that they are correspond to the existing model endpoint data
-        _model_endpoint_validations(
-            model_endpoint=model_endpoint,
-            model_path=model_path,
-            sample_set_statistics=sample_set_statistics,
-        )
+    if endpoint_id or function_name:
+        try:
+            model_endpoint = db_session.get_model_endpoint(
+                project=project,
+                name=model_endpoint_name,
+                endpoint_id=endpoint_id,
+                function_name=function_name,
+                function_tag=function_tag or "latest",
+                feature_analysis=feature_analysis,
+            )
+            # If other fields provided, validate that they are correspond to the existing model endpoint data
+            _model_endpoint_validations(
+                model_endpoint=model_endpoint,
+                model_path=model_path,
+                sample_set_statistics=sample_set_statistics,
+            )
-    except (mlrun.errors.MLRunNotFoundError, mlrun.errors.MLRunInvalidArgumentError):
-        # Create a new model endpoint with the provided details
-        pass
+        except mlrun.errors.MLRunNotFoundError:
+            # Create a new model endpoint with the provided details
+            pass
     if not model_endpoint:
         model_endpoint = _generate_model_endpoint(
             project=project,
@@ -125,6 +134,13 @@ def get_or_create_model_endpoint(
     return model_endpoint
+# TODO: Remove this in 1.12.0
+@deprecated(
+    version="1.10.0",
+    reason="This function is deprecated and will be removed in 1.12. "
+    "Instead, run a monitored serving function as a job",
+    category=FutureWarning,
+)
 def record_results(
     project: str,
     model_path: str,
@@ -146,8 +162,8 @@ def record_results(
     :param model_path:               The model Store path.
     :param model_endpoint_name:      If a new model endpoint is generated, the model endpoint name will be presented
                                      under this endpoint.
-    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record based
-                                     on the provided `endpoint_id`.
+    :param endpoint_id:              Model endpoint unique ID. If not exist in DB, will generate a new record with a
+                                     newly generated ID.
     :param function_name:            If a new model endpoint is created, use this function name for generating the
                                      function URI.
     :param context:                  MLRun context. Note that the context is required generating the model endpoint.
@@ -238,6 +254,7 @@ def _model_endpoint_validations(
             key=model_obj.key,
             iter=model_obj.iter,
             tree=model_obj.tree,
+            uid=model_obj.uid,
         )
         # Enrich the uri schema with the store prefix
@@ -327,12 +344,15 @@ def _generate_model_endpoint(
     :return `mlrun.common.schemas.ModelEndpoint` object.
     """
     current_time = datetime_now()
     model_endpoint = mlrun.common.schemas.ModelEndpoint(
         metadata=mlrun.common.schemas.ModelEndpointMetadata(
             project=project,
             name=model_endpoint_name,
             endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
+            # Due to backwards compatibility, this endpoint will be created as a legacy batch endpoint.
+            mode=mlrun.common.schemas.model_monitoring.EndpointMode.BATCH_LEGACY,
         ),
         spec=mlrun.common.schemas.ModelEndpointSpec(
             function_name=function_name or "function",
@@ -529,8 +549,9 @@ def _create_model_monitoring_function_base(
     name: typing.Optional[str] = None,
     image: typing.Optional[str] = None,
     tag: typing.Optional[str] = None,
-    requirements: typing.Union[str, list[str], None] = None,
+    requirements: typing.Union[list[str], None] = None,
     requirements_file: str = "",
+    local_path: typing.Optional[str] = None,
     **application_kwargs,
 ) -> mlrun.runtimes.ServingRuntime:
     """
@@ -538,12 +559,30 @@ def _create_model_monitoring_function_base(
     This function does not set the labels or mounts v3io.
     """
     if name in mm_constants._RESERVED_FUNCTION_NAMES:
-        raise mlrun.errors.MLRunInvalidArgumentError(
+        raise mlrun.errors.MLRunValueError(
             "An application cannot have the following names: "
             f"{mm_constants._RESERVED_FUNCTION_NAMES}"
         )
+    _, has_valid_suffix, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(name)
+    if name and not has_valid_suffix:
+        raise mlrun.errors.MLRunValueError(
+            f"Model monitoring application names cannot end with `{suffix}`"
+        )
     if func is None:
         func = ""
+    if check_if_hub_uri(func):
+        hub_module = mlrun.get_hub_module(url=func, local_path=local_path)
+        if hub_module.kind != HubModuleType.monitoring_app:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "The provided module is not a monitoring application"
+            )
+        requirements = mlrun.model.ImageBuilder.resolve_requirements(
+            requirements, requirements_file
+        )
+        requirements = merge_requirements(
+            reqs_priority=requirements, reqs_secondary=hub_module.requirements
+        )
+        func = hub_module.get_module_file_path()
     func_obj = typing.cast(
         mlrun.runtimes.ServingRuntime,
         mlrun.code_to_function(

mlrun/model_monitoring/applications/__init__.py CHANGED Viewed

@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .base import ModelMonitoringApplicationBase
+from .base import ExistingDataHandling, ModelMonitoringApplicationBase
 from .context import MonitoringApplicationContext
 from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult

mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc16py3-none-any.whl → 1.10.1rc4py3-none-any.whl