PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/auth/utils.py ADDED Viewed

@@ -0,0 +1,415 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import time
+import typing
+import jwt
+import yaml
+import mlrun.common.constants
+import mlrun.common.schemas
+import mlrun.utils.helpers
+from mlrun.config import config as mlconf
+if typing.TYPE_CHECKING:
+    import mlrun.db
+class Claims:
+    """
+    JWT Claims constants.
+    """
+    SUBJECT = "sub"
+    EXPIRATION = "exp"
+def load_offline_token(raise_on_error=True) -> typing.Optional[str]:
+    """
+    Load the offline token from the environment variable or YAML file.
+    The function first attempts to retrieve the offline token from the environment variable.
+    If not found, it tries to load the token from a YAML file. If both methods fail, it either
+    raises an error or logs a warning based on the `raise_on_error` parameter.
+    :param raise_on_error: If True, raises an error when the offline token cannot be resolved.
+                           If False, logs a warning instead.
+    :return: The offline token if found, otherwise None.
+    """
+    if token_env := get_offline_token_from_env():
+        return token_env
+    return get_offline_token_from_file(raise_on_error=raise_on_error)
+def get_offline_token_from_file(raise_on_error: bool = True) -> typing.Optional[str]:
+    """
+    Retrieve the offline token from a configured file.
+    This function reads the token file specified in the configuration, parses its content,
+    and extracts the offline token. If the file does not exist or cannot be parsed, it either
+    raises an error or logs a warning based on the `raise_on_error` parameter.
+    :param raise_on_error: Whether to raise an error or log a warning on failure.
+    :return: The offline token if found, otherwise None.
+    """
+    tokens = load_secret_tokens_from_file(raise_on_error=raise_on_error)
+    if not tokens:
+        return None
+    return parse_offline_token_data(tokens=tokens, raise_on_error=raise_on_error)
+def load_secret_tokens_from_file(
+    raise_on_error: bool = True,
+) -> list[dict]:
+    """
+    Load and parse secret tokens from a configured file.
+    This function reads the secret tokens file (specified in
+    ``mlrun.mlconf.auth_with_oauth_token.token_file``) and returns the raw list
+    of token dictionaries under the ``secretTokens`` key. It does NOT validate
+    the tokens.
+    If the file is missing, empty, or malformed, the behavior depends on
+    ``raise_on_error``. In such cases, the function will either raise/log an
+    error and return an empty list.
+    :param raise_on_error: Whether to raise exceptions on read/parse failure.
+    :return: List of token dictionaries from ``secretTokens``.
+             Returns an empty list if parsing fails or no tokens exist.
+    :rtype: list[dict[str, Any]]
+    """
+    token_file = os.path.expanduser(mlconf.auth_with_oauth_token.token_file)
+    data = read_secret_tokens_file(raise_on_error=raise_on_error)
+    if not data:
+        mlrun.utils.helpers.raise_or_log_error(
+            f"Token file is empty or could not be parsed: {token_file}",
+            raise_on_error,
+        )
+        return []
+    tokens_list = data.get("secretTokens")
+    if not isinstance(tokens_list, list) or not tokens_list:
+        mlrun.utils.helpers.raise_or_log_error(
+            f"Invalid token file: 'secretTokens' must be a non-empty list in {token_file}",
+            raise_on_error,
+        )
+        return []
+    return tokens_list
+def read_secret_tokens_file(
+    raise_on_error: bool = True,
+) -> typing.Optional[dict[str, typing.Any]]:
+    """
+    Read and parse the secret tokens file.
+    This function attempts to read the token file specified in the configuration and parse its content as YAML.
+    If the file does not exist or cannot be parsed, it either raises an error or logs a warning based on the
+    `raise_on_error` parameter.
+    - The configured path may use ``~`` to represent the user’s home directory, which
+      will be expanded automatically.
+    :param raise_on_error: Whether to raise an error or log a warning on failure.
+    :return: The parsed content of the token file as a dictionary, or None if an error occurs.
+    """
+    token_file = os.path.expanduser(mlconf.auth_with_oauth_token.token_file)
+    if not os.path.exists(token_file):
+        mlrun.utils.helpers.raise_or_log_error(
+            f"Configured token file not found: {token_file}", raise_on_error
+        )
+        return None
+    try:
+        with open(token_file) as token_file_io:
+            data = yaml.safe_load(token_file_io)
+        if not data:
+            mlrun.utils.helpers.raise_or_log_error(
+                f"Token file {token_file} is empty or invalid",
+                raise_on_error,
+            )
+            return None
+        if not isinstance(data, dict):
+            mlrun.utils.helpers.raise_or_log_error(
+                f"Token file {token_file} must contain a YAML mapping (dictionary)",
+                raise_on_error,
+            )
+            return None
+        return data
+    except yaml.YAMLError as exc:
+        mlrun.utils.helpers.raise_or_log_error(
+            f"Failed to parse token file {token_file}: {exc}", raise_on_error
+        )
+        return None
+def parse_offline_token_data(
+    tokens: list[dict[str, typing.Any]], raise_on_error: bool = True
+) -> typing.Optional[str]:
+    """
+    Extract the correct offline token entry from the parsed tokens list.
+    Logic:
+    1. Identify the target token entry using `mlrun.mlconf.auth_with_oauth_token.token_name`:
+       - If the value is set (non-empty):
+         - Look for an entry where `name == <TOKEN_NAME>`.
+         - If no match is found, resolution fails.
+       - If the value is not set (empty string):
+         - Look for an entry named "default".
+         - If not found, fall back to the first token in the list.
+         - If no entries exist, resolution fails.
+    2. Validate the matched entry:
+       - Ensure the `token` field exists and is a valid, non-empty string.
+       - If valid, use the token as the resolved Offline Token.
+    3. If any of the above steps fail, raise a detailed configuration error or log a warning.
+    :param tokens: List of token dictionaries loaded from the YAML file.
+    :param raise_on_error: Whether to raise an error or log a warning on failure.
+    :return: The resolved offline token, or None if resolution fails.
+    """
+    if not isinstance(tokens, list) or not tokens:
+        mlrun.utils.helpers.raise_or_log_error(
+            "Invalid token file: 'secretTokens' must be a non-empty list",
+            raise_on_error,
+        )
+        return None
+    name = mlconf.auth_with_oauth_token.token_name or "default"
+    matches = [t for t in tokens if t.get("name") == name] or (
+        [tokens[0]] if not mlconf.auth_with_oauth_token.token_name else []
+    )
+    if len(matches) != 1:
+        mlrun.utils.helpers.raise_or_log_error(
+            f"Failed to resolve a unique token. Found {len(matches)} entries for name '{name}'",
+            raise_on_error,
+        )
+        return None
+    token_value = matches[0].get("token")
+    if not token_value:
+        mlrun.utils.helpers.raise_or_log_error(
+            "Resolved token entry missing 'token' field",
+            raise_on_error,
+        )
+        return None
+    return token_value
+def get_offline_token_from_env() -> typing.Optional[str]:
+    """
+    Retrieve the offline token from the environment variable.
+    This function checks the environment for the `MLRUN_AUTH_OFFLINE_TOKEN` variable
+    and returns its value if set.
+    :return: The offline token if found in the environment, otherwise None.
+    """
+    return mlrun.secrets.get_secret_or_env("MLRUN_AUTH_OFFLINE_TOKEN")
+def load_and_prepare_secret_tokens(
+    auth_user_id: str | None = None,
+    raise_on_error: bool = True,
+) -> list[mlrun.common.schemas.SecretToken]:
+    """
+    Load, validate, and translate secret tokens from a file into SecretToken objects.
+    Steps performed:
+      1. Load the secret tokens from the configured file.
+      2. Validate each token for required fields and uniqueness.
+      3. Translate validated token dictionaries into SecretToken objects.
+    :param auth_user_id: The user ID to filter the tokens by.
+    :param raise_on_error: Whether to raise exceptions or log warnings on failure
+                           in any of the steps (loading, validation, translation).
+    :return: List of SecretToken objects.
+    :rtype: list[mlrun.common.schemas.SecretToken]
+    """
+    tokens_list = load_secret_tokens_from_file(raise_on_error=raise_on_error)
+    validated_tokens = extract_and_validate_tokens_info(
+        secret_tokens=[
+            mlrun.common.schemas.SecretToken(
+                name=token["name"],
+                token=token["token"],
+            )
+            for token in tokens_list
+        ],
+        authenticated_id=auth_user_id,
+        filter_by_authenticated_id=True,
+    )
+    secret_tokens = _translate_secret_tokens(
+        validated_tokens, raise_on_error=raise_on_error
+    )
+    return secret_tokens
+def extract_and_validate_tokens_info(
+    secret_tokens: list[mlrun.common.schemas.SecretToken],
+    authenticated_id: str,
+    filter_by_authenticated_id: bool = False,
+) -> dict[str, dict[str, typing.Any]]:
+    """
+    Extract and validate tokens info from a list of SecretToken objects.
+    :param secret_tokens: List of SecretToken objects.
+    :param authenticated_id: The authenticated user ID.
+    :return: Dictionary of token info with the token name as the key and the token as the value.
+    """
+    token_values = {}
+    for secret_token in secret_tokens:
+        token_name = secret_token.name
+        # Validate name is provided and not duplicate
+        if secret_token.name and secret_token.name not in token_values:
+            # The token is expected to be a refresh token which we cannot verify ourselves, we verify it separately
+            # via orca when exchanging it for an access token. We decode it here without verification to extract its
+            # claims.
+            decoded_token = _decode_token_unverified(secret_token.token)
+            # Validate token expiration existence
+            if not decoded_token.get(Claims.EXPIRATION):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Offline token '{token_name}' is missing the 'exp' (expiration) claim"
+                )
+            # Validate token subject existence
+            if not decoded_token.get(Claims.SUBJECT):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Offline token '{token_name}' is missing the 'sub' (subject) claim"
+                )
+            # Validate token belongs to the authenticated user
+            token_sub = decoded_token.get(Claims.SUBJECT)
+            if token_sub != authenticated_id:
+                # just ignore the token as it doesn't belong to the authenticated user
+                if filter_by_authenticated_id:
+                    continue
+                mlrun.utils.logger.warning(
+                    "Offline token subject does not match the authenticated user",
+                    token_name=token_name,
+                    token_sub=token_sub,
+                    user_id=authenticated_id,
+                )
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Offline token '{token_name}' does not match the authenticated user ID. "
+                    "Stored tokens can only belong to the authenticated user."
+                )
+            # Store token info
+            token_values[secret_token.name] = {
+                "token_exp": decoded_token.get(Claims.EXPIRATION),
+                "token": secret_token.token,
+            }
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Invalid or duplicate token name '{secret_token.name}' found in request payload"
+            )
+    return token_values
+def resolve_jwt_subject(
+    token: str, raise_on_error: bool = True
+) -> typing.Optional[str]:
+    """
+    Extract the 'sub' (subject/user ID) claim from a JWT token.
+    The token is decoded without signature verification since it has already
+    been verified earlier during the authentication process.
+    :param token: The JWT token string.
+    :param raise_on_error: Whether to raise an error or log a warning on failure.
+    :return: The 'sub' claim value, or None if extraction fails.
+    """
+    try:
+        # This method is used from the client side after receiving this token from the server, there's no need or
+        # ability to verify its signature here.
+        return _decode_token_unverified(token).get(Claims.SUBJECT)
+    except jwt.PyJWTError as exc:
+        mlrun.utils.helpers.raise_or_log_error(
+            f"Failed to decode JWT token: {exc}", raise_on_error
+        )
+        return None
+def is_token_expired(token: str, buffer_seconds: int = 0) -> bool:
+    """
+    Check if a JWT token is expired based on its 'exp' claim.
+    :param token: The JWT token string.
+    :param buffer_seconds: Number of seconds to subtract from the expiration time
+    :return: True if the token is expired, False otherwise.
+    """
+    # This method is used for caching and/or extra validation purposes in addition to the main verification flow,
+    # so we decode without signature verification here.
+    decoded_token = _decode_token_unverified(token)
+    expiration = decoded_token.get(Claims.EXPIRATION)
+    if not expiration:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Token is missing the 'exp' (expiration) claim"
+        )
+    now = time.time()
+    return now >= expiration - buffer_seconds
+def _decode_token_unverified(token: str) -> dict:
+    try:
+        return jwt.decode(token, options={"verify_signature": False})
+    except jwt.DecodeError as exc:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Failed to decode offline token"
+        ) from exc
+    except Exception as exc:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Unexpected error decoding token"
+        ) from exc
+def _translate_secret_tokens(
+    tokens_dict: dict[str, dict[str, typing.Any]], raise_on_error: bool = True
+) -> list[mlrun.common.schemas.SecretToken]:
+    """
+    Translate a dictionary of validated token data into SecretToken objects.
+    The dictionary is keyed by token name, with values containing token data
+    (including the token string). If an entry fails to translate, behavior depends
+    on ``raise_on_error``: raise an exception or log a warning.
+    :param tokens_dict: Dictionary of validated token data, keyed by token name.
+    :param raise_on_error: Whether to raise exceptions on translation errors.
+    :return: List of SecretToken objects created from the input dictionary.
+    :rtype: list[mlrun.common.schemas.SecretToken]
+    """
+    token_file = os.path.expanduser(mlconf.auth_with_oauth_token.token_file)
+    tokens = []
+    for token_name, token_data in tokens_dict.items():
+        try:
+            tokens.append(
+                mlrun.common.schemas.SecretToken(
+                    name=token_name,
+                    token=token_data["token"],
+                )
+            )
+        except Exception as exc:
+            mlrun.utils.helpers.raise_or_log_error(
+                f"Failed to create SecretToken from entry in {token_file}: {exc}",
+                raise_on_error,
+            )
+    return tokens

mlrun/common/constants.py CHANGED Viewed

@@ -27,9 +27,20 @@ DASK_LABEL_PREFIX = "dask.org/"
 NUCLIO_LABEL_PREFIX = "nuclio.io/"
 RESERVED_TAG_NAME_LATEST = "latest"
+# Kubernetes DNS-1123 label name length limit
+K8S_DNS_1123_LABEL_MAX_LENGTH = 63
+RESERVED_BATCH_JOB_SUFFIX = "-batch"
 JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
 JOB_TYPE_PROJECT_LOADER = "project-loader"
 JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
+MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
+MLRUN_JOB_AUTH_SECRET_PATH = "/var/mlrun-secrets/auth"
+MLRUN_JOB_AUTH_SECRET_FILE = ".igz.yml"
+MLRUN_RUNTIME_AUTH_DEFAULT_TOKEN_NAME = "default"
 class MLRunInternalLabels:
@@ -92,6 +103,9 @@ class MLRunInternalLabels:
     workflow = "workflow"
     feature_vector = "feature-vector"
+    auth_username = f"{MLRUN_LABEL_PREFIX}user"
+    auth_token_name = f"{MLRUN_LABEL_PREFIX}token"
     @classmethod
     def all(cls):
         return [

mlrun/common/model_monitoring/helpers.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import sys
 import typing
+from datetime import datetime
 import mlrun.common
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
 BinEdges = typing.NewType("BinEdges", list[float])
 _MAX_FLOAT = sys.float_info.max
+logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
 def parse_model_endpoint_project_prefix(path: str, project_name: str):
@@ -50,6 +52,44 @@ def get_kafka_topic(project: str, function_name: typing.Optional[str] = None) ->
     )
+# Constants for TimescaleDB database naming
+TIMESCALEDB_DEFAULT_DB_PREFIX = "mlrun_mm"
+def get_tsdb_database_name(profile_database: str) -> str:
+    """
+    Determine the TimescaleDB database name based on configuration.
+    When auto_create_database is enabled (default), generates a database name
+    using the system_id: 'mlrun_mm_{system_id}'.
+    When disabled, uses the database from the profile as-is.
+    This function is used by both TimescaleDBConnector (API server side) and
+    TimescaleDBStoreyTarget (stream side) to ensure consistent database naming.
+    :param profile_database: The database name from the PostgreSQL profile.
+    :return: The database name to use for TimescaleDB connections.
+    :raises MLRunInvalidArgumentError: If auto_create_database is enabled but
+                                       system_id is not set.
+    """
+    auto_create = mlrun.mlconf.model_endpoint_monitoring.tsdb.auto_create_database
+    if not auto_create:
+        return profile_database
+    # Auto-create mode: generate database name using system_id
+    if not mlrun.mlconf.system_id:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "system_id is not set in mlrun.mlconf. "
+            "TimescaleDB requires system_id for auto-generating database name "
+            "when auto_create_database is enabled. "
+            "Either set system_id in MLRun configuration or disable auto_create_database "
+            "and provide an explicit database in the PostgreSQL connection string."
+        )
+    return f"{TIMESCALEDB_DEFAULT_DB_PREFIX}_{mlrun.mlconf.system_id}"
 def _get_counts(hist: Histogram) -> BinCounts:
     """Return the histogram counts"""
     return BinCounts(hist[0])
@@ -87,3 +127,86 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
     for feature in feature_stats.values():
         if hist_key in feature:
             pad_hist(Histogram(feature[hist_key]))
+def get_model_endpoints_creation_task_status(
+    server,
+) -> tuple[
+    mlrun.common.schemas.BackgroundTaskState,
+    typing.Optional[datetime],
+    typing.Optional[set[str]],
+]:
+    background_task = None
+    background_task_state = mlrun.common.schemas.BackgroundTaskState.running
+    background_task_check_timestamp = None
+    model_endpoint_uids = None
+    try:
+        background_task = mlrun.get_run_db().get_project_background_task(
+            server.project, server.model_endpoint_creation_task_name
+        )
+        background_task_check_timestamp = mlrun.utils.now_date()
+        log_background_task_state(
+            server, background_task.status.state, background_task_check_timestamp
+        )
+        background_task_state = background_task.status.state
+    except mlrun.errors.MLRunNotFoundError:
+        logger.warning(
+            "Model endpoint creation task not found listing model endpoints",
+            project=server.project,
+            task_name=server.model_endpoint_creation_task_name,
+        )
+    if background_task is None:
+        if model_endpoints := mlrun.get_run_db().list_model_endpoints(
+            project=server.project,
+            function_name=server.function_name,
+            function_tag=server.function_tag,
+            tsdb_metrics=False,
+        ):
+            model_endpoint_uids = {
+                endpoint.metadata.uid for endpoint in model_endpoints.endpoints
+            }
+            logger.info(
+                "Model endpoints found after background task not found, model monitoring will monitor "
+                "events",
+                project=server.project,
+                function_name=server.function_name,
+                function_tag=server.function_tag,
+                uids=model_endpoint_uids,
+            )
+            background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
+        else:
+            logger.warning(
+                "Model endpoints not found after background task not found, model monitoring will not "
+                "monitor events",
+                project=server.project,
+                function_name=server.function_name,
+                function_tag=server.function_tag,
+            )
+            background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
+    return background_task_state, background_task_check_timestamp, model_endpoint_uids
+def log_background_task_state(
+    server,
+    background_task_state: mlrun.common.schemas.BackgroundTaskState,
+    background_task_check_timestamp: typing.Optional[datetime],
+):
+    logger.info(
+        "Checking model endpoint creation task status",
+        task_name=server.model_endpoint_creation_task_name,
+    )
+    if (
+        background_task_state
+        in mlrun.common.schemas.BackgroundTaskState.terminal_states()
+    ):
+        logger.info(
+            f"Model endpoint creation task completed with state {background_task_state}"
+        )
+    else:  # in progress
+        logger.info(
+            f"Model endpoint creation task is still in progress with the current state: "
+            f"{background_task_state}. Events will not be monitored for the next "
+            f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
+            function_name=server.function_name,
+            background_task_check_timestamp=background_task_check_timestamp.isoformat(),
+        )

mlrun/common/runtimes/constants.py CHANGED Viewed

@@ -16,6 +16,7 @@ import typing
 import mlrun.common.constants as mlrun_constants
 import mlrun_pipelines.common.models
+from mlrun.common.types import StrEnum
 class PodPhases:
@@ -365,3 +366,30 @@ class NuclioIngressAddTemplatedIngressModes:
 class FunctionEnvironmentVariables:
     _env_prefix = "MLRUN_"
     auth_session = f"{_env_prefix}AUTH_SESSION"
+# Kubernetes probe types
+class ProbeType(StrEnum):
+    READINESS = "readiness"
+    LIVENESS = "liveness"
+    STARTUP = "startup"
+    @property
+    def key(self):
+        return f"{self.value}Probe"
+    @classmethod
+    def is_valid(cls, value: str, raise_on_error: bool = False) -> bool:
+        valid_value = value in cls._value2member_map_
+        if not valid_value and raise_on_error:
+            raise ValueError(
+                f"Invalid probe type: {value}. Must be one of: {[p.value for p in ProbeType]}"
+            )
+        return valid_value
+class ProbeTimeConfig(StrEnum):
+    INITIAL_DELAY_SECONDS = "initialDelaySeconds"
+    PERIOD_SECONDS = "periodSeconds"
+    TIMEOUT_SECONDS = "timeoutSeconds"
+    FAILURE_THRESHOLD = "failureThreshold"

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -43,6 +43,7 @@ from .artifact import (
 from .auth import (
     AuthInfo,
     AuthorizationAction,
+    AuthorizationResourceNamespace,
     AuthorizationResourceTypes,
     AuthorizationVerificationInput,
     Credentials,
@@ -65,7 +66,9 @@ from .common import ImageBuilder
 from .constants import (
     APIStates,
     ArtifactPartitionByField,
+    AuthorizationHeaderPrefixes,
     ClusterizationRole,
+    CookieNames,
     DeletionStrategy,
     FeatureStorePartitionByField,
     HeaderNames,
@@ -111,14 +114,18 @@ from .feature_store import (
 )
 from .frontend_spec import (
     ArtifactLimits,
-    AuthenticationFeatureFlag,
     FeatureFlags,
     FrontendSpec,
     NuclioStreamsFeatureFlag,
     PreemptionNodesFeatureFlag,
     ProjectMembershipFeatureFlag,
 )
-from .function import FunctionState, PreemptionModes, SecurityContextEnrichmentModes
+from .function import (
+    BatchingSpec,
+    FunctionState,
+    PreemptionModes,
+    SecurityContextEnrichmentModes,
+)
 from .http import HTTPSessionRetryMode
 from .hub import (
     HubCatalog,
@@ -154,6 +161,7 @@ from .model_monitoring import (
     ModelEndpointSchema,
     ModelEndpointSpec,
     ModelEndpointStatus,
+    ModelMonitoringInfraLabel,
     ModelMonitoringMode,
     MonitoringFunctionNames,
     TSDBTarget,
@@ -211,10 +219,13 @@ from .schedule import (
 )
 from .secret import (
     AuthSecretData,
+    ListSecretTokensResponse,
     SecretKeysData,
     SecretProviderName,
     SecretsData,
-    UserSecretCreationRequest,
+    SecretToken,
+    SecretTokenInfo,
+    StoreSecretTokensResponse,
 )
 from .serving import ModelRunnerStepData, ModelsData, MonitoringData
 from .tag import Tag, TagObjects

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl