PyPI - mlrun - Versions diffs - 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl - Mend

mlrun 1.10.0rc16py3-none-any.whl → 1.10.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (98) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +9 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/common/secrets.py +22 -1
mlrun/config.py +32 -10
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/datastore.py +9 -4
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +363 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +211 -74
mlrun/datastore/model_provider/openai_provider.py +243 -71
mlrun/datastore/s3.py +24 -2
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +27 -19
mlrun/db/httpdb.py +57 -48
mlrun/db/nopdb.py +25 -10
mlrun/execution.py +55 -13
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +2 -0
mlrun/model.py +9 -3
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +372 -136
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +36 -13
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
mlrun/model_monitoring/helpers.py +28 -5
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +6 -1
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +128 -45
mlrun/run.py +94 -17
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +147 -17
mlrun/runtimes/nuclio/function.py +70 -27
mlrun/runtimes/nuclio/serving.py +85 -4
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +54 -13
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +211 -40
mlrun/serving/states.py +536 -156
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +136 -81
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +212 -82
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +44 -45
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +97 -92
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0

mlrun/projects/project.py CHANGED Viewed

@@ -45,6 +45,7 @@ import mlrun.common.runtimes.constants
 import mlrun.common.schemas.alert
 import mlrun.common.schemas.artifact
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.common.secrets
 import mlrun.datastore.datastore_profile
 import mlrun.db
 import mlrun.errors
@@ -1888,7 +1889,7 @@ class MlrunProject(ModelObj):
         prompt_path: Optional[str] = None,
         prompt_legend: Optional[dict] = None,
         model_artifact: Union[ModelArtifact, str] = None,
-        model_configuration: Optional[dict] = None,
+        invocation_config: Optional[dict] = None,
         description: Optional[str] = None,
         target_path: Optional[str] = None,
         artifact_path: Optional[str] = None,
@@ -1908,13 +1909,51 @@ class MlrunProject(ModelObj):
         Examples::
+            # Log directly with an inline prompt template
+            project.log_llm_prompt(
+                key="customer_support_prompt",
+                prompt_template=[
+                    {
+                        "role": "system",
+                        "content": "You are a helpful customer support assistant.",
+                    },
+                    {
+                        "role": "user",
+                        "content": "The customer reports: {issue_description}",
+                    },
+                ],
+                prompt_legend={
+                    "issue_description": {
+                        "field": "user_issue",
+                        "description": "Detailed description of the customer's issue",
+                    },
+                    "solution": {
+                        "field": "proposed_solution",
+                        "description": "Suggested fix for the customer's issue",
+                    },
+                },
+                model_artifact=model,
+                invocation_config={"temperature": 0.5, "max_tokens": 200},
+                description="Prompt for handling customer support queries",
+                tag="support-v1",
+                labels={"domain": "support"},
+            )
             # Log a prompt from file
             project.log_llm_prompt(
-                key="qa-prompt",
-                prompt_path="prompts/qa_template.txt",
-                prompt_legend={"question": "user_question"},
+                key="qa_prompt",
+                prompt_path="prompts/template.json",
+                prompt_legend={
+                    "question": {
+                        "field": "user_question",
+                        "description": "The actual question asked by the user",
+                    }
+                },
                 model_artifact=model,
+                invocation_config={"temperature": 0.7, "max_tokens": 256},
+                description="Q&A prompt template with user-provided question",
                 tag="v2",
+                labels={"task": "qa", "stage": "experiment"},
             )
         :param key: Unique key for the prompt artifact.
@@ -1923,18 +1962,23 @@ class MlrunProject(ModelObj):
          "role": "user", "content": "I need your help with {profession}"]. only "role" and "content" keys allow in any
          str format (upper/lower case), keys will be modified to lower case.
          Cannot be used with `prompt_path`.
-        :param prompt_path: Path to a file containing the prompt. Mutually exclusive with `prompt_string`.
+        :param prompt_path: Path to a JSON file containing the prompt template.
+                            Cannot be used together with `prompt_template`.
+                            The file should define a list of dictionaries in the same format
+                            supported by `prompt_template`.
         :param prompt_legend: A dictionary where each key is a placeholder in the prompt (e.g., ``{user_name}``)
                and the value is a dictionary holding two keys, "field", "description". "field" points to the field in
                the event where the value of the place-holder inside the event, if None or not exist will be replaced
                with the place-holder name. "description" will point to explanation of what that placeholder represents.
                Useful for documenting and clarifying dynamic parts of the prompt.
         :param model_artifact: Reference to the parent model (either `ModelArtifact` or model URI string).
-        :param model_configuration: Configuration dictionary for model generation parameters
+        :param invocation_config: Configuration dictionary for model generation parameters
                (e.g., temperature, max tokens).
-        :param description: Optional description of the prompt.
-        :param target_path: Optional local target path for saving prompt content.
-        :param artifact_path: Storage path for the logged artifact.
+        :param description:   Optional description of the prompt.
+        :param target_path:   Absolute target path (instead of using artifact_path + local_path)
+        :param artifact_path: Target artifact path (when not using the default)
+                              To define a subpath under the default location use:
+                              `artifact_path=context.artifact_subpath('data')`
         :param tag: Version tag for the artifact (e.g., "v1", "latest").
         :param labels: Labels to tag the artifact for filtering and organization.
         :param upload: Whether to upload the artifact to a remote datastore. Defaults to True.
@@ -1955,7 +1999,7 @@ class MlrunProject(ModelObj):
             prompt_path=prompt_path,
             prompt_legend=prompt_legend,
             model_artifact=model_artifact,
-            model_configuration=model_configuration,
+            invocation_config=invocation_config,
             target_path=target_path,
             description=description,
             **kwargs,
@@ -2343,8 +2387,9 @@ class MlrunProject(ModelObj):
         handler: Optional[str] = None,
         with_repo: Optional[bool] = None,
         tag: Optional[str] = None,
-        requirements: Optional[typing.Union[str, list[str]]] = None,
+        requirements: Optional[list[str]] = None,
         requirements_file: str = "",
+        local_path: Optional[str] = None,
         **application_kwargs,
     ) -> mlrun.runtimes.RemoteRuntime:
         """
@@ -2359,7 +2404,8 @@ class MlrunProject(ModelObj):
             )
         :param func:                    Remote function object or spec/code URL. :code:`None` refers to the current
-                                        notebook.
+                                        notebook. May also be a hub URL of a module of kind model-monitoring-app in the
+                                        format: hub://[{source}/]{name}[:{tag}].
         :param name:                    Name of the function (under the project), can be specified with a tag to support
                                         versions (e.g. myfunc:v1).
         :param image:                   Docker image to be used, can also be specified in
@@ -2374,6 +2420,8 @@ class MlrunProject(ModelObj):
         :param application_class:       Name or an Instance of a class that implements the monitoring application.
         :param application_kwargs:      Additional keyword arguments to be passed to the
                                         monitoring application's constructor.
+        :param local_path:              Path to a local directory to save the downloaded monitoring-app code files in,
+                                        in case 'func' is a hub URL (defaults to current working directory).
         :returns:                       The model monitoring remote function object.
         """
         (
@@ -2390,6 +2438,7 @@ class MlrunProject(ModelObj):
             tag,
             requirements,
             requirements_file,
+            local_path,
             **application_kwargs,
         )
         # save to project spec
@@ -2468,8 +2517,9 @@ class MlrunProject(ModelObj):
         handler: typing.Optional[str] = None,
         with_repo: typing.Optional[bool] = None,
         tag: typing.Optional[str] = None,
-        requirements: typing.Union[str, list[str], None] = None,
+        requirements: typing.Union[list[str], None] = None,
         requirements_file: str = "",
+        local_path: typing.Optional[str] = None,
         **application_kwargs,
     ) -> tuple[str, mlrun.runtimes.RemoteRuntime, dict]:
         import mlrun.model_monitoring.api
@@ -2486,6 +2536,7 @@ class MlrunProject(ModelObj):
                 tag=tag,
                 requirements=requirements,
                 requirements_file=requirements_file,
+                local_path=local_path,
                 **application_kwargs,
             )
         elif isinstance(func, str) and isinstance(handler, str):
@@ -2531,7 +2582,7 @@ class MlrunProject(ModelObj):
         *,
         deploy_histogram_data_drift_app: bool = True,
         wait_for_deployment: bool = False,
-        fetch_credentials_from_sys_config: bool = False,
+        fetch_credentials_from_sys_config: bool = False,  # deprecated
     ) -> None:
         """
         Deploy model monitoring application controller, writer and stream functions.
@@ -2566,14 +2617,20 @@ class MlrunProject(ModelObj):
         :param wait_for_deployment:               If true, return only after the deployment is done on the backend.
                                                   Otherwise, deploy the model monitoring infrastructure on the
                                                   background, including the histogram data drift app if selected.
-        :param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
+        :param fetch_credentials_from_sys_config: Deprecated. If true, fetch the credentials from the project
+                                                  configuration.
         """
+        if fetch_credentials_from_sys_config:
+            warnings.warn(
+                "`fetch_credentials_from_sys_config` is deprecated in 1.10.0 and will be removed in 1.12.0.",
+                # TODO: Remove this in 1.12.0
+                FutureWarning,
+            )
         if base_period < 10:
             logger.warn(
                 "enable_model_monitoring: 'base_period' < 10 minutes is not supported in production environments",
                 project=self.name,
             )
         db = mlrun.db.get_run_db(secrets=self._secrets)
         db.enable_model_monitoring(
             project=self.name,
@@ -2706,16 +2763,18 @@ class MlrunProject(ModelObj):
         | Creating a function with non project source is done by specifying a module ``handler`` and on the
          returned function set the source with ``function.with_source_archive(<source>)``.
-        Support URL prefixes:
+        Supported URL prefixes:
-            | Object (s3://, v3io://, ..)
-            | MLRun DB e.g. db://project/func:ver
-            | Functions hub/market: e.g. hub://auto-trainer:master
+        - Object: s3://, v3io://, etc.
+        - MLRun DB: e.g db://project/func:ver
+        - Function hub/market: e.g. hub://auto-trainer:master
         Examples::
             proj.set_function(func_object)
-            proj.set_function("http://.../mynb.ipynb", "train")
+            proj.set_function(
+                "http://.../mynb.ipynb", "train", kind="job", image="mlrun/mlrun"
+            )
             proj.set_function("./func.yaml")
             proj.set_function("hub://get_toy_data", "getdata")
@@ -2742,18 +2801,6 @@ class MlrunProject(ModelObj):
             # By providing a path to a pip requirements file
             proj.set_function("my.py", requirements="requirements.txt")
-        One of the most important parameters is 'kind', used to specify the chosen runtime. The options are:
-           - local: execute a local python or shell script
-           - job: insert the code into a Kubernetes pod and execute it
-           - nuclio: insert the code into a real-time serverless nuclio function
-           - serving: insert code into orchestrated nuclio function(s) forming a DAG
-           - dask: run the specified python code / script as Dask Distributed job
-           - mpijob: run distributed Horovod jobs over the MPI job operator
-           - spark: run distributed Spark job using Spark Kubernetes Operator
-           - remote-spark: run distributed Spark job on remote Spark service
-           - databricks: run code on Databricks cluster (python scripts, Spark etc.)
-           - application: run a long living application (e.g. a web server, UI, etc.)
         Learn more about :doc:`../../concepts/functions-overview`.
         :param func:                Function object or spec/code url, None refers to current Notebook
@@ -2761,8 +2808,20 @@ class MlrunProject(ModelObj):
                                     Versions (e.g. myfunc:v1). If the `tag` parameter is provided, the tag in the name
                                     must match the tag parameter.
                                     Specifying a tag in the name will update the project's tagged function (myfunc:v1)
-        :param kind:                Runtime kind e.g. job, nuclio, spark, dask, mpijob
-                                    Default: job
+        :param kind:                Default: job. One of
+                          - local: execute a local python or shell script
+                          - job: insert the code into a Kubernetes pod and execute it
+                          - nuclio: insert the code into a real-time serverless nuclio function
+                          - serving: insert code into orchestrated nuclio function(s) forming a DAG
+                          - dask: run the specified python code / script as Dask Distributed job
+                          - mpijob: run distributed Horovod jobs over the MPI job operator
+                          - spark: run distributed Spark job using Spark Kubernetes Operator
+                          - remote-spark: run distributed Spark job on remote Spark service
+                          - databricks: run code on Databricks cluster (python scripts, Spark etc.)
+                          - application: run a long living application (e.g. a web server, UI, etc.)
+                          - handler: execute a python handler (used automatically in notebooks or for debug)
         :param image:               Docker image to be used, can also be specified in the function object/yaml
         :param handler:             Default function handler to invoke (can only be set with .py/.ipynb files)
         :param with_repo:           Add (clone) the current repo to the build source - use when the function code is in
@@ -3360,7 +3419,12 @@ class MlrunProject(ModelObj):
         self._initialized = True
         return self.spec._function_objects
-    def with_secrets(self, kind, source, prefix=""):
+    def with_secrets(
+        self,
+        kind,
+        source,
+        prefix="",
+    ):
         """register a secrets source (file, env or dict)
         read secrets from a source provider to be used in workflows, example::
@@ -3382,12 +3446,19 @@ class MlrunProject(ModelObj):
         This will enable access to all secrets in vault registered to the current project.
-        :param kind:   secret type (file, inline, env, vault)
+        :param kind:   secret type (file, inline, env, vault, azure_vault)
         :param source: secret data or link (see example)
         :param prefix: add a prefix to the keys in this source
         :returns: project object
         """
+        # Block using mlrun-auth-secrets.* via azure_vault's k8s_secret param (client-side only)
+        if kind == "azure_vault" and isinstance(source, dict):
+            candidate_secret_name = (source.get("k8s_secret") or "").strip()
+            if candidate_secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(
+                    candidate_secret_name
+                )
         if kind == "vault" and isinstance(source, list):
             source = {"project": self.metadata.name, "secrets": source}
@@ -3771,7 +3842,7 @@ class MlrunProject(ModelObj):
             import mlrun
             from mlrun.datastore.datastore_profile import (
-                DatastoreProfileKafkaSource,
+                DatastoreProfileKafkaStream,
                 DatastoreProfileTDEngine,
             )
@@ -3788,7 +3859,7 @@ class MlrunProject(ModelObj):
             project.register_datastore_profile(tsdb_profile)
             # Create and register stream profile
-            stream_profile = DatastoreProfileKafkaSource(
+            stream_profile = DatastoreProfileKafkaStream(
                 name="my-kafka",
                 brokers=["<kafka-broker-ip-address>:9094"],
                 topics=[],  # Keep the topics list empty
@@ -3830,9 +3901,9 @@ class MlrunProject(ModelObj):
         .. code-block:: python
-            from mlrun.datastore.datastore_profile import DatastoreProfileKafkaSource
+            from mlrun.datastore.datastore_profile import DatastoreProfileKafkaStream
-            stream_profile = DatastoreProfileKafkaSource(
+            stream_profile = DatastoreProfileKafkaStream(
                 name="confluent-kafka",
                 brokers=["<server-domain-start>.confluent.cloud:9092"],
                 topics=[],
@@ -3861,7 +3932,7 @@ class MlrunProject(ModelObj):
                                           The supported profiles are:
                                           * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
-                                          * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
+                                          * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream`
                                           You need to register one of them, and pass the profile's name.
         :param replace_creds:             If ``True`` - override the existing credentials.
@@ -3901,6 +3972,9 @@ class MlrunProject(ModelObj):
         start: Optional[datetime.datetime] = None,
         end: Optional[datetime.datetime] = None,
         top_level: bool = False,
+        modes: Optional[
+            Union[mm_constants.EndpointMode, list[mm_constants.EndpointMode]]
+        ] = None,
         uids: Optional[list[str]] = None,
         latest_only: bool = False,
         tsdb_metrics: bool = False,
@@ -3916,8 +3990,9 @@ class MlrunProject(ModelObj):
         5) function_tag
         6) labels
         7) top level
-        8) uids
-        9) start and end time, corresponding to the `created` field.
+        8) modes
+        9) uids
+        10) start and end time, corresponding to the `created` field.
         By default, when no filters are applied, all available endpoints for the given project will be listed.
         In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based
@@ -3937,6 +4012,8 @@ class MlrunProject(ModelObj):
         :param start:           The start time to filter by.Corresponding to the `created` field.
         :param end:             The end time to filter by. Corresponding to the `created` field.
         :param top_level:       If true will return only routers and endpoint that are NOT children of any router.
+        :param modes:           Specifies the mode of the model endpoint. Can be "real-time" (0), "batch" (1),
+                                "batch_legacy" (2). If set to None, all are included.
         :param uids:            If passed will return a list `ModelEndpoint` object with uid in uids.
         :param tsdb_metrics:    When True, the time series metrics will be added to the output
                                 of the resulting.
@@ -3958,6 +4035,7 @@ class MlrunProject(ModelObj):
             start=start,
             end=end,
             top_level=top_level,
+            modes=modes,
             uids=uids,
             latest_only=latest_only,
             tsdb_metrics=tsdb_metrics,
@@ -4052,7 +4130,12 @@ class MlrunProject(ModelObj):
                                 This ensures latest code changes are executed. This argument must be used in
                                 conjunction with the local=True argument.
         :param output_path:     path to store artifacts, when running in a workflow this will be set automatically
-        :param retry:           Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
+        :param retry:           Retry configuration for the run, can be a dict or an instance of
+                                :py:class:`~mlrun.model.Retry`.
+                                The `count` field in the `Retry` object specifies the number of retry attempts.
+                                If `count=0`, the run will not be retried.
+                                The `backoff` field specifies the retry backoff strategy between retry attempts.
+                                If not provided, the default backoff delay is 30 seconds.
         :return: MLRun RunObject or PipelineNodeWrapper
         """
         if artifact_path:

mlrun/run.py CHANGED Viewed

@@ -17,6 +17,7 @@ import json
 import os
 import pathlib
 import socket
+import sys
 import tempfile
 import time
 import typing
@@ -117,14 +118,31 @@ def function_to_module(code="", workdir=None, secrets=None, silent=False):
         raise ValueError("nothing to run, specify command or function")
     command = os.path.join(workdir or "", command)
-    path = Path(command)
-    mod_name = path.name
-    if path.suffix:
-        mod_name = mod_name[: -len(path.suffix)]
+    source_file_path_object, working_dir_path_object = (
+        mlrun.utils.helpers.get_source_and_working_dir_paths(command)
+    )
+    if source_file_path_object.is_relative_to(working_dir_path_object):
+        mod_name = mlrun.utils.helpers.get_relative_module_name_from_path(
+            source_file_path_object, working_dir_path_object
+        )
+    elif source_file_path_object.is_relative_to(
+        pathlib.Path(tempfile.gettempdir()).resolve()
+    ):
+        mod_name = Path(command).stem
+    else:
+        raise mlrun.errors.MLRunRuntimeError(
+            f"Cannot run source file '{command}': it must be located either under the current working "
+            f"directory ('{working_dir_path_object}') or the system temporary directory ('{tempfile.gettempdir()}'). "
+            f"This is required when running with local=True."
+        )
     spec = imputil.spec_from_file_location(mod_name, command)
     if spec is None:
         raise OSError(f"cannot import from {command!r}")
     mod = imputil.module_from_spec(spec)
+    # add to system modules, which can be necessary when running in a MockServer (ML-10937)
+    sys.modules[mod_name] = mod
     spec.loader.exec_module(mod)
     return mod
@@ -141,7 +159,7 @@ def load_func_code(command="", workdir=None, secrets=None, name="name"):
         else:
             is_remote = "://" in command
             data = get_object(command, secrets)
-            runtime = yaml.load(data, Loader=yaml.FullLoader)
+            runtime = yaml.safe_load(data)
             runtime = new_function(runtime=runtime)
         command = runtime.spec.command or ""
@@ -222,7 +240,8 @@ def get_or_create_ctx(
     :param spec:     dictionary holding run spec
     :param with_env: look for context in environment vars, default True
     :param rundb:    path/url to the metadata and artifact database
-    :param project:  project to initiate the context in (by default `mlrun.mlconf.active_project`)
+    :param project:  project to initiate the context in (by default `mlrun.mlconf.active_project`).
+                              If not set, an active project must exist.
     :param upload_artifacts:  when using local context (not as part of a job/run), upload artifacts to the
                               system default artifact path location
     :return: execution context
@@ -277,6 +296,16 @@ def get_or_create_ctx(
     if newspec and not isinstance(newspec, dict):
         newspec = json.loads(newspec)
+    if (
+        not newspec.get("metadata", {}).get("project")
+        and not project
+        and not mlconf.active_project
+    ):
+        raise mlrun.errors.MLRunMissingProjectError(
+            """No active project found. Make sure to set an active project using: mlrun.get_or_create_project()
+            You can verify the active project with: mlrun.mlconf.active_project"""
+        )
     if not newspec:
         newspec = {}
         if upload_artifacts:
@@ -362,10 +391,13 @@ def import_function(url="", secrets=None, db="", project=None, new_name=None):
     return function
-def import_function_to_dict(url, secrets=None):
+def import_function_to_dict(
+    url: str,
+    secrets: Optional[dict] = None,
+) -> dict:
     """Load function spec from local/remote YAML file"""
     obj = get_object(url, secrets)
-    runtime = yaml.load(obj, Loader=yaml.FullLoader)
+    runtime = yaml.safe_load(obj)
     remote = "://" in url
     code = get_in(runtime, "spec.build.functionSourceCode")
@@ -388,20 +420,40 @@ def import_function_to_dict(url, secrets=None):
                 raise ValueError("exec path (spec.command) must be relative")
             url = url[: url.rfind("/") + 1] + code_file
             code = get_object(url, secrets)
+            code_file = _ensure_path_confined_to_base_dir(
+                base_directory=".",
+                relative_path=code_file,
+                error_message_on_escape="Path traversal detected in spec.command",
+            )
             dir = path.dirname(code_file)
             if dir:
                 makedirs(dir, exist_ok=True)
             with open(code_file, "wb") as fp:
                 fp.write(code)
         elif cmd:
-            if not path.isfile(code_file):
-                # look for the file in a relative path to the yaml
-                slash = url.rfind("/")
-                if slash >= 0 and path.isfile(url[: url.rfind("/") + 1] + code_file):
-                    raise ValueError(
-                        f"exec file spec.command={code_file} is relative, change working dir"
-                    )
+            slash_index = url.rfind("/")
+            if slash_index < 0:
+                raise ValueError(f"no file in exec path (spec.command={code_file})")
+            base_dir = os.path.normpath(url[: slash_index + 1])
+            # Validate and resolve the candidate path before checking existence
+            candidate_path = _ensure_path_confined_to_base_dir(
+                base_directory=base_dir,
+                relative_path=code_file,
+                error_message_on_escape=(
+                    f"exec file spec.command={code_file} is outside of allowed directory"
+                ),
+            )
+            # Only now it's safe to check file existence
+            if not path.isfile(candidate_path):
                 raise ValueError(f"no file in exec path (spec.command={code_file})")
+            # Check that the path is absolute
+            if not os.path.isabs(code_file):
+                raise ValueError(
+                    f"exec file spec.command={code_file} is relative, it must be absolute. Change working dir"
+                )
         else:
             raise ValueError("command or code not specified in function spec")
@@ -503,6 +555,7 @@ def new_function(
     # make sure function name is valid
     name = mlrun.utils.helpers.normalize_name(name)
+    mlrun.utils.helpers.validate_function_name(name)
     runner.metadata.name = name
     runner.metadata.project = (
@@ -542,6 +595,7 @@ def new_function(
         )
     runner.prepare_image_for_deploy()
     return runner
@@ -575,7 +629,7 @@ def code_to_function(
     code_output: Optional[str] = "",
     embed_code: bool = True,
     description: Optional[str] = "",
-    requirements: Optional[Union[str, list[str]]] = None,
+    requirements: Optional[list[str]] = None,
     categories: Optional[list[str]] = None,
     labels: Optional[dict[str, str]] = None,
     with_doc: Optional[bool] = True,
@@ -746,6 +800,7 @@ def code_to_function(
         kind=sub_kind,
         ignored_tags=ignored_tags,
     )
     spec["spec"]["env"].append(
         {
             "name": "MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK",
@@ -798,6 +853,7 @@ def code_to_function(
         runtime.spec.build.code_origin = code_origin
         runtime.spec.build.origin_filename = filename or (name + ".ipynb")
         update_common(runtime, spec)
         return runtime
     if kind is None or kind in ["", "Function"]:
@@ -811,6 +867,7 @@ def code_to_function(
     if not name:
         raise ValueError("name must be specified")
     h = get_in(spec, "spec.handler", "").split(":")
     runtime.handler = h[0] if len(h) <= 1 else h[1]
     runtime.metadata = get_in(spec, "spec.metadata")
@@ -1184,11 +1241,13 @@ def get_model_provider(
     raise_missing_schema_exception=True,
 ) -> ModelProvider:
     """get mlrun dataitem object (from path/url)"""
-    store_manager.set(secrets, db=db)
+    #  without caching secrets
+    store_manager.set(db=db)
     return store_manager.model_provider_object(
         url=url,
         default_invoke_kwargs=default_invoke_kwargs,
         raise_missing_schema_exception=raise_missing_schema_exception,
+        secrets=secrets,
     )
@@ -1256,3 +1315,21 @@ def wait_for_runs_completion(
         runs = running
     return completed
+def _ensure_path_confined_to_base_dir(
+    base_directory: str,
+    relative_path: str,
+    error_message_on_escape: str,
+) -> str:
+    """
+    Join `user_supplied_relative_path` to `allowed_base_directory`, normalise the result,
+    and guarantee it stays inside `allowed_base_directory`.
+    """
+    absolute_base_directory = path.abspath(base_directory)
+    absolute_candidate_path = path.abspath(
+        path.join(absolute_base_directory, relative_path)
+    )
+    if not absolute_candidate_path.startswith(absolute_base_directory + path.sep):
+        raise ValueError(error_message_on_escape)
+    return absolute_candidate_path

mlrun/runtimes/__init__.py CHANGED Viewed

@@ -221,6 +221,24 @@ class RuntimeKinds:
             return True
         return False
+    @staticmethod
+    def requires_k8s_name_validation(kind: str) -> bool:
+        """
+        Returns True if the runtime kind creates Kubernetes resources that use the function name.
+        Function names for k8s-deployed runtimes must conform to DNS-1123 label requirements:
+        - Lowercase alphanumeric characters or '-'
+        - Start and end with an alphanumeric character
+        - Maximum 63 characters
+        Local runtimes (local, handler) run on the local machine and don't create k8s resources,
+        so they don't require k8s naming validation.
+        :param kind: Runtime kind string (job, spark, serving, local, etc.)
+        :return: True if function name needs k8s DNS-1123 validation, False otherwise
+        """
+        return not RuntimeKinds.is_local_runtime(kind)
     @staticmethod
     def requires_absolute_artifacts_path(kind):
         """

mlrun 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc16py3-none-any.whl → 1.10.0rc42py3-none-any.whl