PyPI - mlrun - Versions diffs - 1.10.0rc38__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl - Mend

mlrun 1.10.0rc38py3-none-any.whl → 1.10.0rc41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (32) hide show

mlrun/artifacts/document.py +6 -1
mlrun/common/constants.py +6 -0
mlrun/common/model_monitoring/helpers.py +1 -1
mlrun/common/schemas/model_monitoring/constants.py +0 -2
mlrun/common/secrets.py +22 -1
mlrun/launcher/local.py +2 -0
mlrun/model.py +7 -1
mlrun/model_monitoring/api.py +3 -2
mlrun/model_monitoring/applications/base.py +6 -3
mlrun/model_monitoring/applications/context.py +1 -0
mlrun/model_monitoring/db/tsdb/base.py +2 -4
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +8 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +154 -76
mlrun/projects/project.py +15 -2
mlrun/run.py +7 -0
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +3 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +5 -0
mlrun/runtimes/nuclio/application/application.py +2 -0
mlrun/runtimes/nuclio/function.py +2 -0
mlrun/runtimes/nuclio/serving.py +67 -4
mlrun/runtimes/pod.py +59 -10
mlrun/serving/states.py +45 -21
mlrun/utils/helpers.py +77 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/METADATA +3 -3
{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/RECORD +32 -32
{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/top_level.txt +0 -0

mlrun/runtimes/local.py CHANGED Viewed

@@ -29,12 +29,12 @@ from os import environ, remove
 from pathlib import Path
 from subprocess import PIPE, Popen
 from sys import executable
+from typing import Optional
 from nuclio import Event
 import mlrun
 import mlrun.common.constants as mlrun_constants
-import mlrun.common.runtimes.constants
 from mlrun.lists import RunList
 from ..errors import err_to_str
@@ -201,9 +201,12 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
     kind = "local"
     _is_remote = False
-    def to_job(self, image=""):
+    def to_job(self, image="", func_name: Optional[str] = None):
         struct = self.to_dict()
         obj = KubejobRuntime.from_dict(struct)
+        obj.kind = "job"  # Ensure kind is set to 'job' for KubejobRuntime
+        if func_name:
+            obj.metadata.name = func_name
         if image:
             obj.spec.image = image
         return obj

mlrun/runtimes/mounts.py CHANGED Viewed

@@ -17,6 +17,8 @@ import typing
 import warnings
 from collections import namedtuple
+import mlrun.common.secrets
+import mlrun.errors
 from mlrun.config import config
 from mlrun.config import config as mlconf
 from mlrun.errors import MLRunInvalidArgumentError
@@ -412,6 +414,9 @@ def mount_secret(
                          the specified paths, and unlisted keys will not be
                          present."""
+    if secret_name:
+        mlrun.common.secrets.validate_not_forbidden_secret(secret_name.strip())
     def _mount_secret(runtime: "KubeResource"):
         # Define the secret volume source
         secret_volume_source = {

mlrun/runtimes/nuclio/application/application.py CHANGED Viewed

@@ -400,6 +400,8 @@ class ApplicationRuntime(RemoteRuntime):
         :return: The default API gateway URL if created or True if the function is ready (deployed)
         """
+        mlrun.utils.helpers.validate_function_name(self.metadata.name)
         if (self.requires_build() and not self.spec.image) or force_build:
             self._fill_credentials()
             self._build_application_image(

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -655,6 +655,8 @@ class RemoteRuntime(KubeResource):
         if tag:
             self.metadata.tag = tag
+        mlrun.utils.helpers.validate_function_name(self.metadata.name)
         # Attempt auto-mounting, before sending to remote build
         self.try_auto_mount_based_on_config()
         self._fill_credentials()

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -23,6 +23,7 @@ from nuclio import KafkaTrigger
 import mlrun
 import mlrun.common.schemas as schemas
+import mlrun.common.secrets
 import mlrun.datastore.datastore_profile as ds_profile
 from mlrun.datastore import get_kafka_brokers_from_dict, parse_kafka_url
 from mlrun.model import ObjectList
@@ -635,7 +636,12 @@ class ServingRuntime(RemoteRuntime):
         :returns: The Runtime (function) object
         """
+        if kind == "azure_vault" and isinstance(source, dict):
+            candidate_secret_name = (source.get("k8s_secret") or "").strip()
+            if candidate_secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(
+                    candidate_secret_name
+                )
         if kind == "vault" and isinstance(source, list):
             source = {"project": self.metadata.project, "secrets": source}
@@ -659,6 +665,9 @@ class ServingRuntime(RemoteRuntime):
         :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
         :param force_build: set True for force building the image
         """
+        # Validate function name before deploying to k8s
+        mlrun.utils.helpers.validate_function_name(self.metadata.name)
         load_mode = self.spec.load_mode
         if load_mode and load_mode not in ["sync", "async"]:
             raise ValueError(f"illegal model loading mode {load_mode}")
@@ -855,8 +864,20 @@ class ServingRuntime(RemoteRuntime):
         )
         self._mock_server = self.to_mock_server()
-    def to_job(self) -> KubejobRuntime:
-        """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
+    def to_job(self, func_name: Optional[str] = None) -> KubejobRuntime:
+        """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job.
+        Args:
+            func_name: Optional custom name for the job function. If not provided, automatically
+                      appends '-batch' suffix to the serving function name to prevent database collision.
+        Returns:
+            KubejobRuntime configured to execute the serving graph as a batch job.
+        Note:
+            The job will have a different name than the serving function to prevent database collision.
+            The original serving function remains unchanged and can still be invoked after running the job.
+        """
         if self.spec.function_refs:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
@@ -890,8 +911,50 @@ class ServingRuntime(RemoteRuntime):
             parameters=self.spec.parameters,
             graph=self.spec.graph,
         )
+        job_metadata = deepcopy(self.metadata)
+        original_name = job_metadata.name
+        if func_name:
+            # User provided explicit job name
+            job_metadata.name = func_name
+            logger.debug(
+                "Creating job from serving function with custom name",
+                new_name=func_name,
+            )
+        else:
+            job_metadata.name, was_renamed, suffix = (
+                mlrun.utils.helpers.ensure_batch_job_suffix(job_metadata.name)
+            )
+            # Check if the resulting name exceeds Kubernetes length limit
+            if (
+                len(job_metadata.name)
+                > mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH
+            ):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Cannot convert serving function '{original_name}' to batch job: "
+                    f"the resulting name '{job_metadata.name}' ({len(job_metadata.name)} characters) "
+                    f"exceeds Kubernetes limit of {mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH} characters. "
+                    f"Please provide a custom name via the func_name parameter, "
+                    f"with at most {mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH} characters."
+                )
+            if was_renamed:
+                logger.info(
+                    "Creating job from serving function (auto-appended suffix to prevent collision)",
+                    new_name=job_metadata.name,
+                    suffix=suffix,
+                )
+            else:
+                logger.debug(
+                    "Creating job from serving function (name already has suffix)",
+                    name=original_name,
+                    suffix=suffix,
+                )
         job = KubejobRuntime(
             spec=spec,
-            metadata=self.metadata,
+            metadata=job_metadata,
         )
         return job

mlrun/runtimes/pod.py CHANGED Viewed

@@ -20,12 +20,14 @@ import typing
 import warnings
 from collections.abc import Iterable
 from enum import Enum
+from typing import Optional
 import dotenv
 import kubernetes.client as k8s_client
 from kubernetes.client import V1Volume, V1VolumeMount
 import mlrun.common.constants
+import mlrun.common.secrets
 import mlrun.errors
 import mlrun.runtimes.mounts
 import mlrun.utils.regex
@@ -708,19 +710,45 @@ class KubeResource(BaseRuntime):
     def spec(self, spec):
         self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
-    def set_env_from_secret(self, name, secret=None, secret_key=None):
-        """set pod environment var from secret"""
-        secret_key = secret_key or name
+    def set_env_from_secret(
+        self,
+        name: str,
+        secret: Optional[str] = None,
+        secret_key: Optional[str] = None,
+    ):
+        """
+        Set an environment variable from a Kubernetes Secret.
+        Client-side guard forbids MLRun internal auth/project secrets; no-op on API.
+        """
+        mlrun.common.secrets.validate_not_forbidden_secret(secret)
+        key = secret_key or name
         value_from = k8s_client.V1EnvVarSource(
-            secret_key_ref=k8s_client.V1SecretKeySelector(name=secret, key=secret_key)
+            secret_key_ref=k8s_client.V1SecretKeySelector(name=secret, key=key)
         )
-        return self._set_env(name, value_from=value_from)
+        return self._set_env(name=name, value_from=value_from)
-    def set_env(self, name, value=None, value_from=None):
-        """set pod environment var from value"""
-        if value is not None:
-            return self._set_env(name, value=str(value))
-        return self._set_env(name, value_from=value_from)
+    def set_env(
+        self,
+        name: str,
+        value: Optional[str] = None,
+        value_from: Optional[typing.Any] = None,
+    ):
+        """
+        Set an environment variable.
+        If value comes from a Secret, validate on client-side only.
+        """
+        if value_from is not None:
+            secret_name = self._extract_secret_name_from_value_from(
+                value_from=value_from
+            )
+            if secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(secret_name)
+            return self._set_env(name=name, value_from=value_from)
+        # Plain literal value path
+        return self._set_env(
+            name=name, value=(str(value) if value is not None else None)
+        )
     def with_annotations(self, annotations: dict):
         """set a key/value annotations in the metadata of the pod"""
@@ -1366,6 +1394,27 @@ class KubeResource(BaseRuntime):
         return self.status.state
+    @staticmethod
+    def _extract_secret_name_from_value_from(
+        value_from: typing.Any,
+    ) -> Optional[str]:
+        """Extract secret name from a V1EnvVarSource or dict representation."""
+        if isinstance(value_from, k8s_client.V1EnvVarSource):
+            if value_from.secret_key_ref:
+                return value_from.secret_key_ref.name
+        elif isinstance(value_from, dict):
+            value_from = (
+                value_from.get("valueFrom")
+                or value_from.get("value_from")
+                or value_from
+            )
+            secret_key_ref = (value_from or {}).get("secretKeyRef") or (
+                value_from or {}
+            ).get("secret_key_ref")
+            if isinstance(secret_key_ref, dict):
+                return secret_key_ref.get("name")
+        return None
 def _resolve_if_type_sanitized(attribute_name, attribute):
     attribute_config = sanitized_attributes[attribute_name]

mlrun/serving/states.py CHANGED Viewed

@@ -591,15 +591,14 @@ class BaseStep(ModelObj):
                 root.get_shared_model_by_artifact_uri(model_artifact_uri)
             )
-            if not shared_runnable_name:
-                if not actual_shared_name:
-                    raise GraphError(
-                        f"Can't find shared model for {name} model endpoint"
-                    )
-                else:
-                    step.class_args[schemas.ModelRunnerStepData.MODELS][name][
-                        schemas.ModelsData.MODEL_PARAMETERS.value
-                    ]["shared_runnable_name"] = actual_shared_name
+            if not actual_shared_name:
+                raise GraphError(
+                    f"Can't find shared model named {shared_runnable_name}"
+                )
+            elif not shared_runnable_name:
+                step.class_args[schemas.ModelRunnerStepData.MODELS][name][
+                    schemas.ModelsData.MODEL_PARAMETERS.value
+                ]["shared_runnable_name"] = actual_shared_name
             elif actual_shared_name != shared_runnable_name:
                 raise GraphError(
                     f"Model endpoint {name} shared runnable name mismatch: "
@@ -1664,6 +1663,8 @@ class ModelRunnerStep(MonitoredStep):
     Note ModelRunnerStep can only be added to a graph that has the flow topology and running with async engine.
+    Note see config_pool_resource method documentation for default number of max threads and max processes.
     :param model_selector: ModelSelector instance whose select() method will be used to select models to run on each
       event. Optional. If not passed, all models will be run.
     :param raise_exception:  If True, an error will be raised when model selection fails or if one of the models raised
@@ -1676,7 +1677,12 @@ class ModelRunnerStep(MonitoredStep):
     """
     kind = "model_runner"
-    _dict_fields = MonitoredStep._dict_fields + ["_shared_proxy_mapping"]
+    _dict_fields = MonitoredStep._dict_fields + [
+        "_shared_proxy_mapping",
+        "max_processes",
+        "max_threads",
+        "pool_factor",
+    ]
     def __init__(
         self,
@@ -1687,6 +1693,10 @@ class ModelRunnerStep(MonitoredStep):
         raise_exception: bool = True,
         **kwargs,
     ):
+        self.max_processes = None
+        self.max_threads = None
+        self.pool_factor = None
         if isinstance(model_selector, ModelSelector) and model_selector_parameters:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot provide a model_selector object as argument to `model_selector` and also provide "
@@ -1748,6 +1758,7 @@ class ModelRunnerStep(MonitoredStep):
           2. Create a new model endpoint with the same name and set it to `latest`.
         :param override:            bool allow override existing model on the current ModelRunnerStep.
+        :raise GraphError:  when the shared model is not found in the root flow step shared models.
         """
         model_class, model_params = (
             "mlrun.serving.Model",
@@ -1865,14 +1876,6 @@ class ModelRunnerStep(MonitoredStep):
                 otherwise block the main event loop thread.
             * "asyncio" – To run in an asyncio task. This is appropriate for I/O tasks that use asyncio, allowing the
                 event loop to continue running while waiting for a response.
-            * "shared_executor" – Reuses an external executor (typically managed by the flow or context) to execute the
-                runnable. Should be used only if you have multiply `ParallelExecution` in the same flow and especially
-                useful when:
-                - You want to share a heavy resource like a large model loaded onto a GPU.
-                - You want to centralize task scheduling or coordination for multiple lightweight tasks.
-                - You aim to minimize overhead from creating new executors or processes/threads per runnable.
-                The runnable is expected to be pre-initialized and reused across events, enabling efficient use of
-                memory and hardware accelerators.
             * "naive" – To run in the main event loop. This is appropriate only for trivial computation and/or file I/O.
                 It means that the runnable will not actually be run in parallel to anything else.
@@ -2093,6 +2096,24 @@ class ModelRunnerStep(MonitoredStep):
                 "Monitoring data must be a dictionary."
             )
+    def configure_pool_resource(
+        self,
+        max_processes: Optional[int] = None,
+        max_threads: Optional[int] = None,
+        pool_factor: Optional[int] = None,
+    ) -> None:
+        """
+        Configure the resource limits for the shared models in the graph.
+        :param max_processes: Maximum number of processes to spawn (excluding dedicated processes).
+            Defaults to the number of CPUs or 16 if undetectable.
+        :param max_threads: Maximum number of threads to spawn. Defaults to 32.
+        :param pool_factor: Multiplier to scale the number of process/thread workers per runnable. Defaults to 1.
+        """
+        self.max_processes = max_processes
+        self.max_threads = max_threads
+        self.pool_factor = pool_factor
     def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
         self.context = context
         if not self._is_local_function(context):
@@ -2141,6 +2162,9 @@ class ModelRunnerStep(MonitoredStep):
             shared_proxy_mapping=self._shared_proxy_mapping or None,
             name=self.name,
             context=context,
+            max_processes=self.max_processes,
+            max_threads=self.max_threads,
+            pool_factor=self.pool_factor,
         )
@@ -2983,7 +3007,7 @@ class RootFlowStep(FlowStep):
     def get_shared_model_by_artifact_uri(
         self, artifact_uri: str
-    ) -> Optional[tuple[str, str, dict]]:
+    ) -> Union[tuple[str, str, dict], tuple[None, None, None]]:
         """
         Get a shared model by its artifact URI.
         :param artifact_uri: The artifact URI of the model.
@@ -2992,9 +3016,9 @@ class RootFlowStep(FlowStep):
         for model_name, (model_class, model_params) in self.shared_models.items():
             if model_params.get("artifact_uri") == artifact_uri:
                 return model_name, model_class, model_params
-        return None
+        return None, None, None
-    def config_pool_resource(
+    def configure_shared_pool_resource(
         self,
         max_processes: Optional[int] = None,
         max_threads: Optional[int] = None,

mlrun/utils/helpers.py CHANGED Viewed

@@ -253,6 +253,40 @@ def verify_field_regex(
         return False
+def validate_function_name(name: str) -> None:
+    """
+    Validate that a function name conforms to Kubernetes DNS-1123 label requirements.
+    Function names for Kubernetes resources must:
+    - Be lowercase alphanumeric characters or '-'
+    - Start and end with an alphanumeric character
+    - Be at most 63 characters long
+    This validation should be called AFTER normalize_name() has been applied.
+    Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names
+    :param name: The function name to validate (after normalization)
+    :raises MLRunInvalidArgumentError: If the function name is invalid for Kubernetes
+    """
+    if not name:
+        return
+    verify_field_regex(
+        "function.metadata.name",
+        name,
+        mlrun.utils.regex.dns_1123_label,
+        raise_on_failure=True,
+        log_message=(
+            f"Function name '{name}' is invalid. "
+            "Kubernetes function names must be DNS-1123 labels: "
+            "lowercase alphanumeric characters or '-', "
+            "starting and ending with an alphanumeric character, "
+            "and at most 63 characters long."
+        ),
+    )
 def validate_builder_source(
     source: str, pull_at_runtime: bool = False, workdir: Optional[str] = None
 ):
@@ -476,6 +510,40 @@ def normalize_name(name: str):
     return name.lower()
+def ensure_batch_job_suffix(
+    function_name: typing.Optional[str],
+) -> tuple[typing.Optional[str], bool, str]:
+    """
+    Ensure that a function name has the batch job suffix appended to prevent database collision.
+    This helper is used by to_job() methods in runtimes that convert online functions (serving, local)
+    to batch processing jobs. The suffix prevents the job from overwriting the original function in
+    the database when both are stored with the same (project, name) key.
+    :param function_name: The original function name (can be None or empty string)
+    :return: A tuple of (modified_name, was_renamed, suffix) where:
+        - modified_name: The function name with the batch suffix (if not already present),
+          or empty string if input was empty
+        - was_renamed: True if the suffix was added, False if it was already present or if name was empty/None
+        - suffix: The suffix value that was used (or would have been used)
+    """
+    suffix = mlrun_constants.RESERVED_BATCH_JOB_SUFFIX
+    # Handle None or empty string
+    if not function_name:
+        return function_name, False, suffix
+    if not function_name.endswith(suffix):
+        return (
+            f"{function_name}{suffix}",
+            True,
+            suffix,
+        )
+    return function_name, False, suffix
 class LogBatchWriter:
     def __init__(self, func, batch=16, maxtime=5):
         self.batch = batch
@@ -970,8 +1038,15 @@ def enrich_image_url(
         else:
             image_url = "mlrun/mlrun"
-    if is_mlrun_image and tag and ":" not in image_url:
-        image_url = f"{image_url}:{tag}"
+    if is_mlrun_image and tag:
+        if ":" not in image_url:
+            image_url = f"{image_url}:{tag}"
+        elif enrich_kfp_python_version:
+            # For mlrun-kfp >= 1.10.0-rc0, append python suffix to existing tag
+            python_suffix = resolve_image_tag_suffix(
+                mlrun_version, client_python_version
+            )
+            image_url = f"{image_url}{python_suffix}" if python_suffix else image_url
     registry = (
         config.images_registry if is_mlrun_image else config.vendor_images_registry

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "cc5c5639d721f37d6a1d0d0b7cf9f853f38e4707",
-  "version": "1.10.0-rc38"
+  "git_commit": "09d6e7ada4324bf80961e0d54f9fd9857852fe53",
+  "version": "1.10.0-rc41"
 }

{mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlrun
-Version: 1.10.0rc38
+Version: 1.10.0rc41
 Summary: Tracking and config of machine learning runs
 Home-page: https://github.com/mlrun/mlrun
 Author: Yaron Haviv
@@ -113,7 +113,7 @@ Requires-Dist: apscheduler<4,>=3.11; extra == "api"
 Requires-Dist: objgraph~=3.6; extra == "api"
 Requires-Dist: igz-mgmt~=0.4.1; extra == "api"
 Requires-Dist: humanfriendly~=10.0; extra == "api"
-Requires-Dist: fastapi~=0.116.0; extra == "api"
+Requires-Dist: fastapi~=0.120.0; extra == "api"
 Requires-Dist: sqlalchemy~=2.0; extra == "api"
 Requires-Dist: sqlalchemy-utils~=0.41.2; extra == "api"
 Requires-Dist: pymysql~=1.1; extra == "api"
@@ -203,7 +203,7 @@ Requires-Dist: dask~=2023.12.1; python_version < "3.11" and extra == "complete-a
 Requires-Dist: databricks-sdk~=0.20.0; extra == "complete-api"
 Requires-Dist: distributed==2024.8; python_version >= "3.11" and extra == "complete-api"
 Requires-Dist: distributed~=2023.12.1; python_version < "3.11" and extra == "complete-api"
-Requires-Dist: fastapi~=0.116.0; extra == "complete-api"
+Requires-Dist: fastapi~=0.120.0; extra == "complete-api"
 Requires-Dist: gcsfs<=2025.7.0,>=2025.5.1; extra == "complete-api"
 Requires-Dist: google-cloud-bigquery-storage~=2.17; extra == "complete-api"
 Requires-Dist: google-cloud-bigquery[bqstorage,pandas]==3.14.1; extra == "complete-api"

mlrun 1.10.0rc38__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc38py3-none-any.whl → 1.10.0rc41py3-none-any.whl