PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import json
 import os
-import warnings
+from base64 import b64decode
 from copy import deepcopy
 from typing import Optional, Union
@@ -22,6 +22,11 @@ from nuclio import KafkaTrigger
 import mlrun
 import mlrun.common.schemas as schemas
+import mlrun.common.secrets
+import mlrun.datastore.datastore_profile as ds_profile
+import mlrun.runtimes.kubejob as kubejob_runtime
+import mlrun.runtimes.nuclio.function as nuclio_function
+import mlrun.runtimes.pod as pod_runtime
 from mlrun.datastore import get_kafka_brokers_from_dict, parse_kafka_url
 from mlrun.model import ObjectList
 from mlrun.runtimes.function_reference import FunctionReference
@@ -42,10 +47,6 @@ from mlrun.serving.states import (
 )
 from mlrun.utils import get_caller_globals, logger, set_paths
-from .. import KubejobRuntime
-from ..pod import KubeResourceSpec
-from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
 serving_subkind = "serving_v2"
@@ -83,8 +84,8 @@ def new_v2_model_server(
     return f
-class ServingSpec(NuclioSpec):
-    _dict_fields = NuclioSpec._dict_fields + [
+class ServingSpec(nuclio_function.NuclioSpec):
+    _dict_fields = nuclio_function.NuclioSpec._dict_fields + [
         "graph",
         "load_mode",
         "graph_initializer",
@@ -152,6 +153,7 @@ class ServingSpec(NuclioSpec):
         disable_default_http_trigger=None,
         model_endpoint_creation_task_name=None,
         serving_spec=None,
+        auth=None,
     ):
         super().__init__(
             command=command,
@@ -193,6 +195,7 @@ class ServingSpec(NuclioSpec):
             add_templated_ingress_host_mode=add_templated_ingress_host_mode,
             disable_default_http_trigger=disable_default_http_trigger,
             serving_spec=serving_spec,
+            auth=auth,
         )
         self.models = models or {}
@@ -229,7 +232,7 @@ class ServingSpec(NuclioSpec):
         self._function_refs = ObjectList.from_list(FunctionReference, function_refs)
-class ServingRuntime(RemoteRuntime):
+class ServingRuntime(nuclio_function.RemoteRuntime):
     """MLRun Serving Runtime"""
     kind = "serving"
@@ -248,6 +251,8 @@ class ServingRuntime(RemoteRuntime):
         class_name=None,
         engine=None,
         exist_ok=False,
+        allow_cyclic: bool = False,
+        max_iterations: Optional[int] = None,
         **class_args,
     ) -> Union[RootFlowStep, RouterStep]:
         """set the serving graph topology (router/flow) and root class or params
@@ -278,14 +283,23 @@ class ServingRuntime(RemoteRuntime):
         :param class_name:   - optional for router, router class name/path or router object
         :param engine:       - optional for flow, sync or async engine
         :param exist_ok:     - allow overriding existing topology
+        :param allow_cyclic: - allow cyclic graphs (only for async flow)
+        :param max_iterations: - optional, max iterations for cyclic graphs (only for async flow)
         :param class_args:   - optional, router/flow class init args
-        :return graph object (fn.spec.graph)
+        :return: graph object (fn.spec.graph)
         """
         topology = topology or StepKinds.router
         if self.spec.graph and not exist_ok:
             raise mlrun.errors.MLRunInvalidArgumentError(
-                "graph topology is already set, cannot be overwritten"
+                "graph topology is already set, graph was initialized, use exist_ok=True to override"
+            )
+        if allow_cyclic and (
+            topology == StepKinds.router
+            or (topology == StepKinds.flow and engine == "sync")
+        ):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "cyclic graphs are only supported in flow topology with async engine"
             )
         if topology == StepKinds.router:
@@ -299,7 +313,11 @@ class ServingRuntime(RemoteRuntime):
                 step = RouterStep(class_name=class_name, class_args=class_args)
             self.spec.graph = step
         elif topology == StepKinds.flow:
-            self.spec.graph = RootFlowStep(engine=engine or "async")
+            self.spec.graph = RootFlowStep(
+                engine=engine or "async",
+                allow_cyclic=allow_cyclic,
+                max_iterations=max_iterations,
+            )
             self.spec.graph.track_models = self.spec.track_models
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -310,7 +328,6 @@ class ServingRuntime(RemoteRuntime):
     def set_tracking(
         self,
         stream_path: Optional[str] = None,
-        batch: Optional[int] = None,
         sampling_percentage: float = 100,
         stream_args: Optional[dict] = None,
         enable_tracking: bool = True,
@@ -320,7 +337,6 @@ class ServingRuntime(RemoteRuntime):
         :param stream_path:                Path/url of the tracking stream e.g. v3io:///users/mike/mystream
                                            you can use the "dummy://" path for test/simulation.
-        :param batch:                      Deprecated. Micro batch size (send micro batches of N records at a time).
         :param sampling_percentage:        Down sampling events that will be pushed to the monitoring stream based on
                                            a specified percentage. e.g. 50 for 50%. By default, all events are pushed.
         :param stream_args:                Stream initialization parameters, e.g. shards, retention_in_hours, ..
@@ -368,13 +384,6 @@ class ServingRuntime(RemoteRuntime):
         if stream_path:
             self.spec.parameters["log_stream"] = stream_path
-        if batch:
-            warnings.warn(
-                "The `batch` size parameter was deprecated in version 1.8.0 and is no longer used. "
-                "It will be removed in 1.11.",
-                # TODO: Remove this in 1.11
-                FutureWarning,
-            )
         if stream_args:
             self.spec.parameters["stream_args"] = stream_args
@@ -393,7 +402,7 @@ class ServingRuntime(RemoteRuntime):
         outputs: Optional[list[str]] = None,
         **class_args,
     ):
-        """add ml model and/or route to the function.
+        """Add ml model and/or route to the function.
         Example, create a function (from the notebook), add a model class, and deploy::
@@ -401,7 +410,7 @@ class ServingRuntime(RemoteRuntime):
             fn.add_model("boost", model_path, model_class="MyClass", my_arg=5)
             fn.deploy()
-        only works with router topology, for nested topologies (model under router under flow)
+        Only works with router topology. For nested topologies (model under router under flow)
         need to add router to flow and use router.add_route()
         :param key:         model api key (or name:version), will determine the relative url/path
@@ -414,18 +423,19 @@ class ServingRuntime(RemoteRuntime):
                             with multiple router steps)
         :param child_function: child function name, when the model runs in a child function
         :param creation_strategy: Strategy for creating or updating the model endpoint:
-            * **overwrite**:
-            1. If model endpoints with the same name exist, delete the `latest` one.
-            2. Create a new model endpoint entry and set it as `latest`.
-            * **inplace** (default):
-            1. If model endpoints with the same name exist, update the `latest` entry.
-            2. Otherwise, create a new entry.
-            * **archive**:
-            1. If model endpoints with the same name exist, preserve them.
-            2. Create a new model endpoint with the same name and set it to `latest`.
-        :param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs that been
-                        configured in the model artifact, please note that those outputs need to be equal to the
-                        model serving function outputs (length, and order)
+                          * **overwrite**: If model endpoints with the same name exist, delete the `latest`
+                            one. Create a new model endpoint entry and set it as `latest`.
+                          * **inplace** (default): If model endpoints with the same name exist, update the
+                            `latest` entry. Otherwise, create a new entry.
+                          * **archive**: If model endpoints with the same name exist, preserve them.
+                            Create a new model endpoint with the same name and set it to `latest`.
+        :param outputs: list of the model outputs (e.g. labels), if provided will override the outputs that were
+                        configured in the model artifact. Note that those outputs need to be equal to the
+                        model serving function outputs (length, and order).
         :param class_args:  extra kwargs to pass to the model serving class __init__
                             (can be read in the model using .get_param(key) method)
         """
@@ -518,7 +528,7 @@ class ServingRuntime(RemoteRuntime):
         :param requirements: py package requirements file path OR list of packages
         :param kind:   mlrun function/runtime kind
-        :return function object
+        :return: function object
         """
         function_reference = FunctionReference(
             url,
@@ -633,14 +643,19 @@ class ServingRuntime(RemoteRuntime):
         :returns: The Runtime (function) object
         """
+        if kind == "azure_vault" and isinstance(source, dict):
+            candidate_secret_name = (source.get("k8s_secret") or "").strip()
+            if candidate_secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(
+                    candidate_secret_name
+                )
         if kind == "vault" and isinstance(source, list):
             source = {"project": self.metadata.project, "secrets": source}
         self.spec.secret_sources.append({"kind": kind, "source": source})
         return self
-    @min_nuclio_versions("1.12.10")
+    @nuclio_function.min_nuclio_versions("1.12.10")
     def deploy(
         self,
         project="",
@@ -657,6 +672,7 @@ class ServingRuntime(RemoteRuntime):
         :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
         :param force_build: set True for force building the image
         """
         load_mode = self.spec.load_mode
         if load_mode and load_mode not in ["sync", "async"]:
             raise ValueError(f"illegal model loading mode {load_mode}")
@@ -677,6 +693,21 @@ class ServingRuntime(RemoteRuntime):
                         f"function {function} is used in steps and is not defined, "
                         "use the .add_child_function() to specify child function attributes"
                     )
+        if (
+            isinstance(self.spec.graph, RootFlowStep)
+            and any(
+                isinstance(step_type, mlrun.serving.states.ModelRunnerStep)
+                for step_type in self.spec.graph.steps.values()
+            )
+            and self.spec.build.functionSourceCode
+        ):
+            # Add import for LLModel
+            decoded_code = b64decode(self.spec.build.functionSourceCode).decode("utf-8")
+            import_llmodel_code = "\nfrom mlrun.serving.states import LLModel\n"
+            if import_llmodel_code not in decoded_code:
+                decoded_code += import_llmodel_code
+            encoded_code = mlrun.utils.helpers.encode_user_code(decoded_code)
+            self.spec.build.functionSourceCode = encoded_code
         # Handle secret processing before handling child functions, since secrets are transferred to them
         if self.spec.secret_sources:
@@ -740,6 +771,7 @@ class ServingRuntime(RemoteRuntime):
         current_function="*",
         track_models=False,
         workdir=None,
+        stream_profile: Optional[ds_profile.DatastoreProfile] = None,
         **kwargs,
     ) -> GraphServer:
         """create mock server object for local testing/emulation
@@ -748,6 +780,7 @@ class ServingRuntime(RemoteRuntime):
         :param current_function: specify if you want to simulate a child function, * for all functions
         :param track_models: allow model tracking (disabled by default in the mock server)
         :param workdir:   working directory to locate the source code (if not the current one)
+        :param stream_profile:   stream profile to use for the mock server output stream.
         """
         # set the namespaces/modules to look for the steps code in
@@ -787,6 +820,7 @@ class ServingRuntime(RemoteRuntime):
             logger=logger,
             is_mock=True,
             monitoring_mock=self.spec.track_models,
+            stream_profile=stream_profile,
         )
         server.graph = add_system_steps_to_graph(
@@ -835,14 +869,28 @@ class ServingRuntime(RemoteRuntime):
         )
         self._mock_server = self.to_mock_server()
-    def to_job(self) -> KubejobRuntime:
-        """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
+    def to_job(
+        self, func_name: Optional[str] = None
+    ) -> "kubejob_runtime.KubejobRuntime":
+        """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job.
+        Args:
+            func_name: Optional custom name for the job function. If not provided, automatically
+                      appends '-batch' suffix to the serving function name to prevent database collision.
+        Returns:
+            KubejobRuntime configured to execute the serving graph as a batch job.
+        Note:
+            The job will have a different name than the serving function to prevent database collision.
+            The original serving function remains unchanged and can still be invoked after running the job.
+        """
         if self.spec.function_refs:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
             )
-        spec = KubeResourceSpec(
+        spec = pod_runtime.KubeResourceSpec(
             image=self.spec.image,
             mode=self.spec.mode,
             volumes=self.spec.volumes,
@@ -870,8 +918,50 @@ class ServingRuntime(RemoteRuntime):
             parameters=self.spec.parameters,
             graph=self.spec.graph,
         )
-        job = KubejobRuntime(
+        job_metadata = deepcopy(self.metadata)
+        original_name = job_metadata.name
+        if func_name:
+            # User provided explicit job name
+            job_metadata.name = func_name
+            logger.debug(
+                "Creating job from serving function with custom name",
+                new_name=func_name,
+            )
+        else:
+            job_metadata.name, was_renamed, suffix = (
+                mlrun.utils.helpers.ensure_batch_job_suffix(job_metadata.name)
+            )
+            # Check if the resulting name exceeds Kubernetes length limit
+            if (
+                len(job_metadata.name)
+                > mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH
+            ):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Cannot convert serving function '{original_name}' to batch job: "
+                    f"the resulting name '{job_metadata.name}' ({len(job_metadata.name)} characters) "
+                    f"exceeds Kubernetes limit of {mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH} characters. "
+                    f"Please provide a custom name via the func_name parameter, "
+                    f"with at most {mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH} characters."
+                )
+            if was_renamed:
+                logger.info(
+                    "Creating job from serving function (auto-appended suffix to prevent collision)",
+                    new_name=job_metadata.name,
+                    suffix=suffix,
+                )
+            else:
+                logger.debug(
+                    "Creating job from serving function (name already has suffix)",
+                    name=original_name,
+                    suffix=suffix,
+                )
+        job = kubejob_runtime.KubejobRuntime(
             spec=spec,
-            metadata=self.metadata,
+            metadata=job_metadata,
         )
         return job

mlrun/runtimes/pod.py CHANGED Viewed

@@ -17,14 +17,17 @@ import os
 import re
 import time
 import typing
+import warnings
 from collections.abc import Iterable
 from enum import Enum
+from typing import Optional
 import dotenv
 import kubernetes.client as k8s_client
 from kubernetes.client import V1Volume, V1VolumeMount
 import mlrun.common.constants
+import mlrun.common.secrets
 import mlrun.errors
 import mlrun.runtimes.mounts
 import mlrun.utils.regex
@@ -35,6 +38,7 @@ from mlrun.common.schemas import (
 from ..config import config as mlconf
 from ..k8s_utils import (
+    generate_preemptible_nodes_affinity_terms,
     validate_node_selectors,
 )
 from ..utils import logger, update_in
@@ -107,6 +111,7 @@ class KubeResourceSpec(FunctionSpec):
         "track_models",
         "parameters",
         "graph",
+        "filename",
     ]
     _default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
         "volumes",
@@ -705,19 +710,45 @@ class KubeResource(BaseRuntime):
     def spec(self, spec):
         self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
-    def set_env_from_secret(self, name, secret=None, secret_key=None):
-        """set pod environment var from secret"""
-        secret_key = secret_key or name
+    def set_env_from_secret(
+        self,
+        name: str,
+        secret: Optional[str] = None,
+        secret_key: Optional[str] = None,
+    ):
+        """
+        Set an environment variable from a Kubernetes Secret.
+        Client-side guard forbids MLRun internal auth/project secrets; no-op on API.
+        """
+        mlrun.common.secrets.validate_not_forbidden_secret(secret)
+        key = secret_key or name
         value_from = k8s_client.V1EnvVarSource(
-            secret_key_ref=k8s_client.V1SecretKeySelector(name=secret, key=secret_key)
+            secret_key_ref=k8s_client.V1SecretKeySelector(name=secret, key=key)
         )
-        return self._set_env(name, value_from=value_from)
+        return self._set_env(name=name, value_from=value_from)
+    def set_env(
+        self,
+        name: str,
+        value: Optional[str] = None,
+        value_from: Optional[typing.Any] = None,
+    ):
+        """
+        Set an environment variable.
+        If value comes from a Secret, validate on client-side only.
+        """
+        if value_from is not None:
+            secret_name = self._extract_secret_name_from_value_from(
+                value_from=value_from
+            )
+            if secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(secret_name)
+            return self._set_env(name=name, value_from=value_from)
-    def set_env(self, name, value=None, value_from=None):
-        """set pod environment var from value"""
-        if value is not None:
-            return self._set_env(name, value=str(value))
-        return self._set_env(name, value_from=value_from)
+        # Plain literal value path
+        return self._set_env(
+            name=name, value=(str(value) if value is not None else None)
+        )
     def with_annotations(self, annotations: dict):
         """set a key/value annotations in the metadata of the pod"""
@@ -874,6 +905,133 @@ class KubeResource(BaseRuntime):
         """
         self.spec.with_requests(mem, cpu, patch=patch)
+    @staticmethod
+    def detect_preemptible_node_selector(node_selector: dict[str, str]) -> list[str]:
+        """
+        Check whether any provided node selector matches preemptible selectors.
+        :param node_selector: User-provided node selector mapping.
+        :return: List of `"key='value'"` strings that match a preemptible selector.
+        """
+        preemptible_node_selector = mlconf.get_preemptible_node_selector()
+        return [
+            f"'{key}': '{val}'"
+            for key, val in node_selector.items()
+            if preemptible_node_selector.get(key) == val
+        ]
+    def detect_preemptible_tolerations(
+        self, tolerations: list[k8s_client.V1Toleration]
+    ) -> list[str]:
+        """
+        Check whether any provided toleration matches preemptible tolerations.
+        :param tolerations: User-provided tolerations.
+        :return: List of formatted toleration strings that are considered preemptible.
+        """
+        preemptible_tolerations = [
+            k8s_client.V1Toleration(
+                key=toleration.get("key"),
+                value=toleration.get("value"),
+                effect=toleration.get("effect"),
+            )
+            for toleration in mlconf.get_preemptible_tolerations()
+        ]
+        def _format_toleration(toleration):
+            return f"'{toleration.key}'='{toleration.value}' (effect: '{toleration.effect}')"
+        return [
+            _format_toleration(toleration)
+            for toleration in tolerations
+            if toleration in preemptible_tolerations
+        ]
+    def detect_preemptible_affinity(self, affinity: k8s_client.V1Affinity) -> list[str]:
+        """
+        Check whether any provided affinity rules match preemptible affinity configs.
+        :param affinity: User-provided affinity object.
+        :return: List of formatted expressions that overlap with preemptible terms.
+        """
+        preemptible_affinity_terms = generate_preemptible_nodes_affinity_terms()
+        conflicting_affinities = []
+        if (
+            affinity
+            and affinity.node_affinity
+            and affinity.node_affinity.required_during_scheduling_ignored_during_execution
+        ):
+            user_terms = affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms
+            for user_term in user_terms:
+                user_expressions = {
+                    (expr.key, expr.operator, tuple(expr.values or []))
+                    for expr in user_term.match_expressions or []
+                }
+                for preemptible_term in preemptible_affinity_terms:
+                    preemptible_expressions = {
+                        (expr.key, expr.operator, tuple(expr.values or []))
+                        for expr in preemptible_term.match_expressions or []
+                    }
+                    # Ensure operators match and preemptible expressions are present
+                    common_exprs = user_expressions & preemptible_expressions
+                    if common_exprs:
+                        formatted = ", ".join(
+                            f"'{key}  {operator}  {list(values)}'"
+                            for key, operator, values in common_exprs
+                        )
+                        conflicting_affinities.append(formatted)
+        return conflicting_affinities
+    def raise_preemptible_warning(
+        self,
+        node_selector: typing.Optional[dict[str, str]],
+        tolerations: typing.Optional[list[k8s_client.V1Toleration]],
+        affinity: typing.Optional[k8s_client.V1Affinity],
+    ) -> None:
+        """
+        Detect conflicts and emit a single consolidated warning if needed.
+        :param node_selector: User-provided node selector.
+        :param tolerations: User-provided tolerations.
+        :param affinity: User-provided affinity.
+        :warns: PreemptionWarning - Emitted when any of the provided selectors,
+                tolerations, or affinity terms match the configured preemptible
+                settings. The message lists the conflicting items.
+        """
+        conflict_messages = []
+        if node_selector:
+            ns_conflicts = ", ".join(
+                self.detect_preemptible_node_selector(node_selector)
+            )
+            if ns_conflicts:
+                conflict_messages.append(f"Node selectors: {ns_conflicts}")
+        if tolerations:
+            tol_conflicts = ", ".join(self.detect_preemptible_tolerations(tolerations))
+            if tol_conflicts:
+                conflict_messages.append(f"Tolerations: {tol_conflicts}")
+        if affinity:
+            affinity_conflicts = ", ".join(self.detect_preemptible_affinity(affinity))
+            if affinity_conflicts:
+                conflict_messages.append(f"Affinity: {affinity_conflicts}")
+        if conflict_messages:
+            warning_componentes = "; \n".join(conflict_messages)
+            warnings.warn(
+                f"Warning: based on MLRun's preemptible node configuration, the following components \n"
+                f"may be removed or adjusted at runtime:\n"
+                f"{warning_componentes}.\n"
+                "This adjustment depends on the function's preemption mode. \n"
+                "The list of potential adjusted preemptible selectors can be viewed here: "
+                "mlrun.mlconf.get_preemptible_node_selector() and mlrun.mlconf.get_preemptible_tolerations()."
+            )
     def with_node_selection(
         self,
         node_name: typing.Optional[str] = None,
@@ -882,18 +1040,26 @@ class KubeResource(BaseRuntime):
         tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
     ):
         """
-        Enables to control on which k8s node the job will run
-        :param node_name:       The name of the k8s node
-        :param node_selector:   Label selector, only nodes with matching labels will be eligible to be picked
-        :param affinity:        Expands the types of constraints you can express - see
-                                https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
-                                for details
-        :param tolerations:     Tolerations are applied to pods, and allow (but do not require) the pods to schedule
-                                onto nodes with matching taints - see
-                                https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
-                                for details
+        Configure Kubernetes node scheduling for this function.
+        Updates one or more scheduling hints: exact node pinning, label-based selection,
+        affinity/anti-affinity rules, and taint tolerations. Passing ``None`` leaves the
+        current value unchanged; pass an empty dict/list (e.g., ``{}``, ``[]``) to clear.
+        :param node_name: Exact Kubernetes node name to pin the pod to.
+        :param node_selector: Mapping of label selectors. Use ``{}`` to clear.
+        :param affinity: :class:`kubernetes.client.V1Affinity` constraints.
+        :param tolerations: List of :class:`kubernetes.client.V1Toleration`. Use ``[]`` to clear.
+        :warns: PreemptionWarning - Emitted if provided selectors/tolerations/affinity
+                conflict with the function's preemption mode.
+        Example usage:
+            Prefer a GPU pool and allow scheduling on spot nodes::
+                job.with_node_selection(
+                    node_selector={"nodepool": "gpu"},
+                    tolerations=[k8s_client.V1Toleration(key="spot", operator="Exists")],
+                )
         """
         if node_name:
             self.spec.node_name = node_name
@@ -904,6 +1070,11 @@ class KubeResource(BaseRuntime):
             self.spec.affinity = affinity
         if tolerations is not None:
             self.spec.tolerations = tolerations
+        self.raise_preemptible_warning(
+            node_selector=self.spec.node_selector,
+            tolerations=self.spec.tolerations,
+            affinity=self.spec.affinity,
+        )
     def with_priority_class(self, name: typing.Optional[str] = None):
         """
@@ -1223,6 +1394,27 @@ class KubeResource(BaseRuntime):
         return self.status.state
+    @staticmethod
+    def _extract_secret_name_from_value_from(
+        value_from: typing.Any,
+    ) -> Optional[str]:
+        """Extract secret name from a V1EnvVarSource or dict representation."""
+        if isinstance(value_from, k8s_client.V1EnvVarSource):
+            if value_from.secret_key_ref:
+                return value_from.secret_key_ref.name
+        elif isinstance(value_from, dict):
+            value_from = (
+                value_from.get("valueFrom")
+                or value_from.get("value_from")
+                or value_from
+            )
+            secret_key_ref = (value_from or {}).get("secretKeyRef") or (
+                value_from or {}
+            ).get("secret_key_ref")
+            if isinstance(secret_key_ref, dict):
+                return secret_key_ref.get("name")
+        return None
 def _resolve_if_type_sanitized(attribute_name, attribute):
     attribute_config = sanitized_attributes[attribute_name]

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl