PyPI - mlrun - Versions diffs - 1.10.0rc37__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl - Mend

mlrun 1.10.0rc37py3-none-any.whl → 1.10.0rc41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (33) hide show

mlrun/artifacts/document.py +6 -1
mlrun/common/constants.py +6 -0
mlrun/common/model_monitoring/helpers.py +1 -1
mlrun/common/schemas/model_monitoring/constants.py +0 -2
mlrun/common/secrets.py +22 -1
mlrun/launcher/local.py +2 -0
mlrun/model.py +7 -1
mlrun/model_monitoring/api.py +3 -2
mlrun/model_monitoring/applications/base.py +6 -3
mlrun/model_monitoring/applications/context.py +1 -0
mlrun/model_monitoring/db/tsdb/base.py +2 -4
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +17 -11
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +154 -76
mlrun/projects/project.py +15 -2
mlrun/run.py +26 -1
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +3 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +5 -0
mlrun/runtimes/nuclio/application/application.py +2 -0
mlrun/runtimes/nuclio/function.py +14 -0
mlrun/runtimes/nuclio/serving.py +67 -4
mlrun/runtimes/pod.py +59 -10
mlrun/serving/server.py +42 -10
mlrun/serving/states.py +75 -26
mlrun/utils/helpers.py +86 -10
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/METADATA +3 -3
{mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/RECORD +33 -33
{mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/top_level.txt +0 -0

mlrun/artifacts/document.py CHANGED Viewed

@@ -359,7 +359,12 @@ class DocumentArtifact(Artifact):
         self,
         splitter: Optional["TextSplitter"] = None,  # noqa: F821
     ) -> list["Document"]:  # noqa: F821
-        from langchain.schema import Document
+        # Try new langchain 1.0+ import path first
+        try:
+            from langchain_core.documents import Document
+        except ImportError:
+            # Fall back to old langchain <1.0 import path
+            from langchain.schema import Document
         """
         Create LC documents from the artifact

mlrun/common/constants.py CHANGED Viewed

@@ -27,6 +27,12 @@ DASK_LABEL_PREFIX = "dask.org/"
 NUCLIO_LABEL_PREFIX = "nuclio.io/"
 RESERVED_TAG_NAME_LATEST = "latest"
+# Kubernetes DNS-1123 label name length limit
+K8S_DNS_1123_LABEL_MAX_LENGTH = 63
+RESERVED_BATCH_JOB_SUFFIX = "-batch"
 JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
 JOB_TYPE_PROJECT_LOADER = "project-loader"
 JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"

mlrun/common/model_monitoring/helpers.py CHANGED Viewed

@@ -170,6 +170,6 @@ def log_background_task_state(
             f"Model endpoint creation task is still in progress with the current state: "
             f"{background_task_state}. Events will not be monitored for the next "
             f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
-            function_name=server.function.name,
+            function_name=server.function_name,
             background_task_check_timestamp=background_task_check_timestamp.isoformat(),
         )

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -486,8 +486,6 @@ class ModelMonitoringLabels:
 _RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
-_RESERVED_EVALUATE_FUNCTION_SUFFIX = "-batch"
 class ModelEndpointMonitoringMetricType(StrEnum):
     RESULT = "result"

mlrun/common/secrets.py CHANGED Viewed

@@ -11,10 +11,31 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 from abc import ABC, abstractmethod
 import mlrun.common.schemas
+from mlrun.config import config as mlconf
+_AUTH_SECRET_NAME_TEMPLATE = re.escape(
+    mlconf.secret_stores.kubernetes.auth_secret_name.format(
+        hashed_access_key="",
+    )
+)
+AUTH_SECRET_PATTERN = re.compile(f"^{_AUTH_SECRET_NAME_TEMPLATE}.*")
+def validate_not_forbidden_secret(secret_name: str) -> None:
+    """
+    Forbid client-supplied references to internal MLRun auth/project secrets.
+    No-op when running inside the API server (API enrichments are allowed).
+    """
+    if not secret_name or mlrun.config.is_running_as_api():
+        return
+    if AUTH_SECRET_PATTERN.match(secret_name):
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"Forbidden secret '{secret_name}' matches MLRun auth-secret pattern."
+        )
 class SecretProviderInterface(ABC):

mlrun/launcher/local.py CHANGED Viewed

@@ -243,6 +243,8 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
         # if the handler has module prefix force "local" (vs "handler") runtime
         kind = "local" if isinstance(handler, str) and "." in handler else ""
+        # Create temporary local function for execution
         fn = mlrun.new_function(meta.name, command=command, args=args, kind=kind)
         fn.metadata = meta
         setattr(fn, "_is_run_local", True)

mlrun/model.py CHANGED Viewed

@@ -29,6 +29,7 @@ import pydantic.v1.error_wrappers
 import mlrun
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas.notification
+import mlrun.common.secrets
 import mlrun.utils.regex
 from .utils import (
@@ -1616,7 +1617,12 @@ class RunTemplate(ModelObj):
         :returns: The RunTemplate object
         """
+        if kind == "azure_vault" and isinstance(source, dict):
+            candidate_secret_name = (source.get("k8s_secret") or "").strip()
+            if candidate_secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(
+                    candidate_secret_name
+                )
         if kind == "vault" and isinstance(source, list):
             source = {"project": self.metadata.project, "secrets": source}

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -563,9 +563,10 @@ def _create_model_monitoring_function_base(
             "An application cannot have the following names: "
             f"{mm_constants._RESERVED_FUNCTION_NAMES}"
         )
-    if name and name.endswith(mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX):
+    _, has_valid_suffix, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(name)
+    if name and not has_valid_suffix:
         raise mlrun.errors.MLRunValueError(
-            "Model monitoring application names cannot end with `-batch`"
+            f"Model monitoring application names cannot end with `{suffix}`"
         )
     if func is None:
         func = ""

mlrun/model_monitoring/applications/base.py CHANGED Viewed

@@ -799,10 +799,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                 f"`{mm_constants.APP_NAME_REGEX.pattern}`. "
                 "Please choose another `func_name`."
             )
-        if not job_name.endswith(mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX):
-            job_name += mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX
+        job_name, was_renamed, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(
+            job_name
+        )
+        if was_renamed:
             mlrun.utils.logger.info(
-                'Changing function name - adding `"-batch"` suffix', func_name=job_name
+                f'Changing function name - adding `"{suffix}"` suffix',
+                func_name=job_name,
             )
         return job_name

mlrun/model_monitoring/applications/context.py CHANGED Viewed

@@ -249,6 +249,7 @@ class MonitoringApplicationContext:
                 project=self.project_name,
                 endpoint_id=self.endpoint_id,
                 feature_analysis=True,
+                tsdb_metrics=False,
             )
         return self._model_endpoint

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from abc import ABC, abstractmethod
 from datetime import datetime, timedelta
-from typing import Callable, ClassVar, Literal, Optional, Union
+from typing import ClassVar, Literal, Optional, Union
 import pandas as pd
 import pydantic.v1
@@ -444,11 +444,9 @@ class TSDBConnector(ABC):
                                    ]
         """
-    async def add_basic_metrics(
+    def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
-        project: str,
-        run_in_threadpool: Callable,
         metric_list: Optional[list[str]] = None,
     ) -> list[mlrun.common.schemas.ModelEndpoint]:
         raise NotImplementedError()

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import threading
 from datetime import datetime, timedelta
-from typing import Callable, Final, Literal, Optional, Union
+from typing import Final, Literal, Optional, Union
 import pandas as pd
 import taosws
@@ -55,14 +55,12 @@ class TDEngineConnector(TSDBConnector):
     """
     type: str = mm_schemas.TSDBTarget.TDEngine
-    database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
     def __init__(
         self,
         project: str,
         profile: DatastoreProfile,
         timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
-        **kwargs,
     ):
         super().__init__(project=project)
@@ -72,6 +70,15 @@ class TDEngineConnector(TSDBConnector):
             timestamp_precision
         )
+        if not mlrun.mlconf.system_id:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "system_id is not set in mlrun.mlconf. "
+                "TDEngineConnector requires system_id to be configured for database name construction. "
+                "Please ensure MLRun configuration is properly loaded before creating TDEngineConnector."
+            )
+        self.database = (
+            f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
+        )
         self._init_super_tables()
     @property
@@ -1050,7 +1057,6 @@ class TDEngineConnector(TSDBConnector):
             ]
         ):
             metric_objects = []
             if not df_results.empty:
                 df_results.rename(
                     columns={
@@ -1062,7 +1068,9 @@ class TDEngineConnector(TSDBConnector):
                     metric_objects.append(
                         mm_schemas.ApplicationResultRecord(
                             time=datetime.fromisoformat(
-                                row[mm_schemas.WriterEvent.END_INFER_TIME]
+                                row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
+                                    " +", "+"
+                                )
                             ),
                             result_name=row[mm_schemas.ResultData.RESULT_NAME],
                             kind=row[mm_schemas.ResultData.RESULT_KIND],
@@ -1082,7 +1090,9 @@ class TDEngineConnector(TSDBConnector):
                     metric_objects.append(
                         mm_schemas.ApplicationMetricRecord(
                             time=datetime.fromisoformat(
-                                row[mm_schemas.WriterEvent.END_INFER_TIME]
+                                row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
+                                    " +", "+"
+                                )
                             ),
                             metric_name=row[mm_schemas.MetricData.METRIC_NAME],
                             value=row[mm_schemas.MetricData.METRIC_VALUE],
@@ -1241,11 +1251,9 @@ class TDEngineConnector(TSDBConnector):
             df.dropna(inplace=True)
         return df
-    async def add_basic_metrics(
+    def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
-        project: str,
-        run_in_threadpool: Callable,
         metric_list: Optional[list[str]] = None,
     ) -> list[mlrun.common.schemas.ModelEndpoint]:
         """
@@ -1253,8 +1261,6 @@ class TDEngineConnector(TSDBConnector):
         :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
                                         be filled with the relevant basic metrics.
-        :param project:                The name of the project.
-        :param run_in_threadpool:      A function that runs another function in a thread pool.
         :param metric_list:            List of metrics to include from the time series DB. Defaults to all metrics.
         :return: A list of `ModelEndpointMonitoringMetric` objects.

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from io import StringIO
-from typing import Callable, Literal, Optional, Union
+from typing import Literal, Optional, Union
 import pandas as pd
 import v3io_frames
@@ -1230,11 +1230,9 @@ class V3IOTSDBConnector(TSDBConnector):
             )
         return df.reset_index(drop=True)
-    async def add_basic_metrics(
+    def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
-        project: str,
-        run_in_threadpool: Callable,
         metric_list: Optional[list[str]] = None,
     ) -> list[mlrun.common.schemas.ModelEndpoint]:
         """
@@ -1242,8 +1240,6 @@ class V3IOTSDBConnector(TSDBConnector):
         :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
                                        be filled with the relevant basic metrics.
-        :param project:                The name of the project.
-        :param run_in_threadpool:      A function that runs another function in a thread pool.
         :param metric_list:            List of metrics to include from the time series DB. Defaults to all metrics.
         :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -1272,8 +1268,7 @@ class V3IOTSDBConnector(TSDBConnector):
             function,
             _,
         ) in metric_name_to_function_and_column_name.items():
-            metric_name_to_result[metric_name] = await run_in_threadpool(
-                function,
+            metric_name_to_result[metric_name] = function(
                 endpoint_ids=uids,
                 get_raw=True,
             )
@@ -1344,7 +1339,7 @@ class V3IOTSDBConnector(TSDBConnector):
             else:
                 filter_query = app_filter_query
-        df = self._get_records(
+        raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
             table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
             start=start,
             end=end,
@@ -1353,39 +1348,33 @@ class V3IOTSDBConnector(TSDBConnector):
                 mm_schemas.ResultData.RESULT_STATUS,
             ],
             filter_query=filter_query,
+            get_raw=True,
         )
-        # filter result status
-        if result_status_list and not df.empty:
-            df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
-        if df.empty:
+        if not raw_frames:
             return {}
-        else:
-            # convert application name to lower case
-            df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
-                mm_schemas.ApplicationEvent.APPLICATION_NAME
-            ].str.lower()
-            df = (
-                df[
-                    [
-                        mm_schemas.ApplicationEvent.APPLICATION_NAME,
-                        mm_schemas.ResultData.RESULT_STATUS,
-                        mm_schemas.ResultData.RESULT_VALUE,
-                    ]
-                ]
-                .groupby(
-                    [
-                        mm_schemas.ApplicationEvent.APPLICATION_NAME,
-                        mm_schemas.ResultData.RESULT_STATUS,
-                    ],
-                    observed=True,
-                )
-                .count()
-            )
-            return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
+        # Count occurrences by (application_name, result_status) from RawFrame objects
+        count_dict = {}
+        for frame in raw_frames:
+            # Extract column data from each RawFrame
+            app_name = frame.column_data(mm_schemas.ApplicationEvent.APPLICATION_NAME)[
+                0
+            ]
+            statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
+            for status in statuses:
+                # Filter by result status if specified
+                if result_status_list and status not in result_status_list:
+                    continue
+                # Convert application name to lower case
+                key = (app_name.lower(), status)
+                # Update the count in the dictionary
+                count_dict[key] = count_dict.get(key, 0) + 1
+        return count_dict
     def count_processed_model_endpoints(
         self,
@@ -1543,51 +1532,140 @@ class V3IOTSDBConnector(TSDBConnector):
     ) -> mm_schemas.ModelEndpointDriftValues:
         table = mm_schemas.V3IOTSDBTables.APP_RESULTS
         start, end, interval = self._prepare_aligned_start_end(start, end)
-        df = self._get_records(
+        raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
             table=table,
             start=start,
             end=end,
             columns=[mm_schemas.ResultData.RESULT_STATUS],
+            get_raw=True,
         )
-        df = self._aggregate_raw_drift_data(df, start, end, interval)
-        if df.empty:
+        if not raw_frames:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        aggregated_data = self._aggregate_raw_drift_data(
+            raw_frames=raw_frames, start=start, end=end, interval=interval
+        )
+        if not aggregated_data:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        # Filter to only include entries with max result_status >= 1
+        filtered_data = [
+            (endpoint_id, timestamp, max_status)
+            for endpoint_id, timestamp, max_status in aggregated_data
+            if max_status >= 1
+        ]
+        if not filtered_data:
             return mm_schemas.ModelEndpointDriftValues(values=[])
-        df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
-        return self._df_to_drift_data(df)
+        return self._convert_drift_data_to_values(aggregated_data=filtered_data)
     @staticmethod
     def _aggregate_raw_drift_data(
-        df: pd.DataFrame, start: datetime, end: datetime, interval: str
-    ) -> pd.DataFrame:
-        if df.empty:
-            return df
-        if not isinstance(df.index, pd.DatetimeIndex):
-            raise TypeError("Expected a DatetimeIndex on the DataFrame (time index).")
-        df[EventFieldType.ENDPOINT_ID] = (
-            df[EventFieldType.ENDPOINT_ID].astype("string").str.strip()
-        )  # remove extra data carried by the category dtype
-        window = df.loc[
-            (df.index >= start) & (df.index < end),
-            [mm_schemas.ResultData.RESULT_STATUS, EventFieldType.ENDPOINT_ID],
+        raw_frames: list[v3io_frames.client.RawFrame],
+        start: datetime,
+        end: datetime,
+        interval: str,
+    ) -> list[tuple[str, datetime, float]]:
+        """
+        Aggregate raw drift data from RawFrame objects.
+        :param raw_frames: List of RawFrame objects containing drift data.
+        :param start:      Start datetime for filtering data.
+        :param end:        End datetime for filtering data.
+        :param interval:   Time interval string (e.g., '5min') for aggregation
+        :returns: list of tuples: (endpoint_id, timestamp, max_result_status)
+        """
+        if not raw_frames:
+            return []
+        # Parse interval to get timedelta
+        interval_td = pd.Timedelta(interval)
+        # Collect all data points from RawFrame objects
+        data_points = []
+        for frame in raw_frames:
+            endpoint_id = frame.column_data(EventFieldType.ENDPOINT_ID)[0]
+            result_statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
+            timestamps = frame.indices()[0].times
+            # Combine data from this frame
+            for i, (status, timestamp) in enumerate(zip(result_statuses, timestamps)):
+                # V3IO TSDB returns timestamps in nanoseconds
+                timestamp_dt = pd.Timestamp(
+                    timestamp, unit="ns", tzinfo=timezone.utc
+                ).to_pydatetime()
+                # Filter by time window
+                if start <= timestamp_dt < end:
+                    data_points.append((endpoint_id, timestamp_dt, status))
+        if not data_points:
+            return []
+        # Group by endpoint_id and time intervals, then find max status
+        # Create time buckets aligned to start
+        grouped_data = {}
+        for endpoint_id, timestamp, status in data_points:
+            # Calculate which interval bucket this timestamp falls into
+            time_diff = timestamp - start
+            bucket_index = int(time_diff / interval_td)
+            bucket_start = start + (bucket_index * interval_td)
+            key = (endpoint_id, bucket_start)
+            if key not in grouped_data:
+                grouped_data[key] = status
+            else:
+                # Keep the maximum status value
+                grouped_data[key] = max(grouped_data[key], status)
+        # Convert to list of tuples
+        result = [
+            (endpoint_id, timestamp, max_status)
+            for (endpoint_id, timestamp), max_status in grouped_data.items()
         ]
-        out = (
-            window.groupby(
-                [
-                    EventFieldType.ENDPOINT_ID,
-                    pd.Grouper(
-                        freq=interval, origin=start, label="left", closed="left"
-                    ),
-                ]
-                # align to start, [start, end) intervals
-            )[mm_schemas.ResultData.RESULT_STATUS]
-            .max()
-            .reset_index()
-            .rename(
-                columns={
-                    mm_schemas.ResultData.RESULT_STATUS: f"max({mm_schemas.ResultData.RESULT_STATUS})"
+        return result
+    @staticmethod
+    def _convert_drift_data_to_values(
+        aggregated_data: list[tuple[str, datetime, float]],
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        """
+        Convert aggregated drift data to ModelEndpointDriftValues format.
+        :param aggregated_data: List of tuples (endpoint_id, timestamp, max_result_status)
+        :return: ModelEndpointDriftValues with counts of suspected and detected per timestamp
+        """
+        suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
+        detected_val = mm_schemas.constants.ResultStatusApp.detected.value
+        # Group by timestamp and result status, then count occurrences
+        timestamp_status_counts = {}
+        for _, timestamp, max_status in aggregated_data:
+            key = (timestamp, max_status)
+            timestamp_status_counts[key] = timestamp_status_counts.get(key, 0) + 1
+        # Organize by timestamp with counts for suspected and detected
+        timestamp_counts = {}
+        for (timestamp, status), count in timestamp_status_counts.items():
+            if timestamp not in timestamp_counts:
+                timestamp_counts[timestamp] = {
+                    "count_suspected": 0,
+                    "count_detected": 0,
                 }
-            )
-        )
-        return out.rename(
-            columns={"time": "_wstart"}
-        )  # rename datetime column to _wstart to align with the tdengine result
+            if status == suspected_val:
+                timestamp_counts[timestamp]["count_suspected"] = count
+            elif status == detected_val:
+                timestamp_counts[timestamp]["count_detected"] = count
+        # Convert to the expected format: list of (timestamp, count_suspected, count_detected)
+        values = [
+            (timestamp, counts["count_suspected"], counts["count_detected"])
+            for timestamp, counts in sorted(timestamp_counts.items())
+        ]
+        return mm_schemas.ModelEndpointDriftValues(values=values)

mlrun/projects/project.py CHANGED Viewed

@@ -45,6 +45,7 @@ import mlrun.common.runtimes.constants
 import mlrun.common.schemas.alert
 import mlrun.common.schemas.artifact
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.common.secrets
 import mlrun.datastore.datastore_profile
 import mlrun.db
 import mlrun.errors
@@ -3418,7 +3419,12 @@ class MlrunProject(ModelObj):
         self._initialized = True
         return self.spec._function_objects
-    def with_secrets(self, kind, source, prefix=""):
+    def with_secrets(
+        self,
+        kind,
+        source,
+        prefix="",
+    ):
         """register a secrets source (file, env or dict)
         read secrets from a source provider to be used in workflows, example::
@@ -3440,12 +3446,19 @@ class MlrunProject(ModelObj):
         This will enable access to all secrets in vault registered to the current project.
-        :param kind:   secret type (file, inline, env, vault)
+        :param kind:   secret type (file, inline, env, vault, azure_vault)
         :param source: secret data or link (see example)
         :param prefix: add a prefix to the keys in this source
         :returns: project object
         """
+        # Block using mlrun-auth-secrets.* via azure_vault's k8s_secret param (client-side only)
+        if kind == "azure_vault" and isinstance(source, dict):
+            candidate_secret_name = (source.get("k8s_secret") or "").strip()
+            if candidate_secret_name:
+                mlrun.common.secrets.validate_not_forbidden_secret(
+                    candidate_secret_name
+                )
         if kind == "vault" and isinstance(source, list):
             source = {"project": self.metadata.name, "secrets": source}

mlrun/run.py CHANGED Viewed

@@ -118,7 +118,25 @@ def function_to_module(code="", workdir=None, secrets=None, silent=False):
         raise ValueError("nothing to run, specify command or function")
     command = os.path.join(workdir or "", command)
-    mod_name = mlrun.utils.helpers.get_module_name_from_path(command)
+    source_file_path_object, working_dir_path_object = (
+        mlrun.utils.helpers.get_source_and_working_dir_paths(command)
+    )
+    if source_file_path_object.is_relative_to(working_dir_path_object):
+        mod_name = mlrun.utils.helpers.get_relative_module_name_from_path(
+            source_file_path_object, working_dir_path_object
+        )
+    elif source_file_path_object.is_relative_to(
+        pathlib.Path(tempfile.gettempdir()).resolve()
+    ):
+        mod_name = Path(command).stem
+    else:
+        raise mlrun.errors.MLRunRuntimeError(
+            f"Cannot run source file '{command}': it must be located either under the current working "
+            f"directory ('{working_dir_path_object}') or the system temporary directory ('{tempfile.gettempdir()}'). "
+            f"This is required when running with local=True."
+        )
     spec = imputil.spec_from_file_location(mod_name, command)
     if spec is None:
         raise OSError(f"cannot import from {command!r}")
@@ -537,6 +555,7 @@ def new_function(
     # make sure function name is valid
     name = mlrun.utils.helpers.normalize_name(name)
+    mlrun.utils.helpers.validate_function_name(name)
     runner.metadata.name = name
     runner.metadata.project = (
@@ -576,6 +595,7 @@ def new_function(
         )
     runner.prepare_image_for_deploy()
     return runner
@@ -780,6 +800,9 @@ def code_to_function(
         kind=sub_kind,
         ignored_tags=ignored_tags,
     )
+    mlrun.utils.helpers.validate_function_name(name)
     spec["spec"]["env"].append(
         {
             "name": "MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK",
@@ -832,6 +855,7 @@ def code_to_function(
         runtime.spec.build.code_origin = code_origin
         runtime.spec.build.origin_filename = filename or (name + ".ipynb")
         update_common(runtime, spec)
         return runtime
     if kind is None or kind in ["", "Function"]:
@@ -845,6 +869,7 @@ def code_to_function(
     if not name:
         raise ValueError("name must be specified")
     h = get_in(spec, "spec.handler", "").split(":")
     runtime.handler = h[0] if len(h) <= 1 else h[1]
     runtime.metadata = get_in(spec, "spec.metadata")

mlrun 1.10.0rc37__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc37py3-none-any.whl → 1.10.0rc41py3-none-any.whl