PyPI - mlrun - Versions diffs - 1.8.0rc30__py3-none-any.whl → 1.8.0rc31__py3-none-any.whl - Mend

mlrun 1.8.0rc30py3-none-any.whl → 1.8.0rc31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (33) hide show

mlrun/__init__.py +2 -35
mlrun/api/schemas/__init__.py +1 -6
mlrun/common/runtimes/constants.py +4 -0
mlrun/common/schemas/__init__.py +0 -2
mlrun/common/schemas/model_monitoring/__init__.py +0 -2
mlrun/common/schemas/model_monitoring/constants.py +1 -6
mlrun/common/schemas/model_monitoring/grafana.py +17 -11
mlrun/config.py +9 -36
mlrun/datastore/storeytargets.py +20 -3
mlrun/model_monitoring/applications/base.py +55 -40
mlrun/model_monitoring/applications/results.py +2 -2
mlrun/model_monitoring/controller.py +4 -3
mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
mlrun/model_monitoring/db/tsdb/base.py +60 -39
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -52
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
mlrun/model_monitoring/helpers.py +16 -15
mlrun/model_monitoring/stream_processing.py +6 -13
mlrun/projects/pipelines.py +11 -3
mlrun/projects/project.py +84 -107
mlrun/serving/states.py +1 -1
mlrun/serving/v2_serving.py +20 -10
mlrun/utils/helpers.py +1 -1
mlrun/utils/logger.py +13 -10
mlrun/utils/notifications/notification_pusher.py +24 -0
mlrun/utils/regex.py +1 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/METADATA +2 -2
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/RECORD +33 -33
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/top_level.txt +0 -0

mlrun/__init__.py CHANGED Viewed

@@ -26,7 +26,6 @@ __all__ = [
     "VolumeMount",
 ]
-import collections
 from os import environ, path
 from typing import Optional
@@ -215,40 +214,8 @@ def set_env_from_file(env_file: str, return_dict: bool = False) -> Optional[dict
     if None in env_vars.values():
         raise MLRunInvalidArgumentError("env file lines must be in the form key=value")
-    ordered_env_vars = order_env_vars(env_vars)
-    for key, value in ordered_env_vars.items():
+    for key, value in env_vars.items():
         environ[key] = value
     mlconf.reload()  # reload mlrun configuration
-    return ordered_env_vars if return_dict else None
-def order_env_vars(env_vars: dict[str, str]) -> dict[str, str]:
-    """
-    Order and process environment variables by first handling specific ordered keys,
-    then processing the remaining keys in the given dictionary.
-    The function ensures that environment variables defined in the `ordered_keys` list
-    are added to the result dictionary first. Any other environment variables from
-    `env_vars` are then added in the order they appear in the input dictionary.
-    :param env_vars: A dictionary where each key is the name of an environment variable (str),
-                      and each value is the corresponding environment variable value (str).
-    :return: A dictionary with the processed environment variables, ordered with the specific
-             keys first, followed by the rest in their original order.
-    """
-    ordered_keys = mlconf.get_ordered_keys()
-    ordered_env_vars = collections.OrderedDict()
-    # First, add the ordered keys to the dictionary
-    for key in ordered_keys:
-        if key in env_vars:
-            ordered_env_vars[key] = env_vars[key]
-    # Then, add the remaining keys (those not in ordered_keys)
-    for key, value in env_vars.items():
-        if key not in ordered_keys:
-            ordered_env_vars[key] = value
-    return ordered_env_vars
+    return env_vars if return_dict else None

mlrun/api/schemas/__init__.py CHANGED Viewed

@@ -193,9 +193,7 @@ FeatureValues = DeprecationHelper(mlrun.common.schemas.FeatureValues)
 GrafanaColumn = DeprecationHelper(
     mlrun.common.schemas.model_monitoring.grafana.GrafanaColumn
 )
-GrafanaDataPoint = DeprecationHelper(
-    mlrun.common.schemas.model_monitoring.grafana.GrafanaDataPoint
-)
 GrafanaNumberColumn = DeprecationHelper(
     mlrun.common.schemas.model_monitoring.grafana.GrafanaNumberColumn
 )
@@ -205,9 +203,6 @@ GrafanaStringColumn = DeprecationHelper(
 GrafanaTable = DeprecationHelper(
     mlrun.common.schemas.model_monitoring.grafana.GrafanaTable
 )
-GrafanaTimeSeriesTarget = DeprecationHelper(
-    mlrun.common.schemas.model_monitoring.grafana.GrafanaTimeSeriesTarget
-)
 ModelEndpoint = DeprecationHelper(mlrun.common.schemas.ModelEndpoint)
 ModelEndpointList = DeprecationHelper(mlrun.common.schemas.ModelEndpointList)
 ModelEndpointMetadata = DeprecationHelper(mlrun.common.schemas.ModelEndpointMetadata)

mlrun/common/runtimes/constants.py CHANGED Viewed

@@ -194,6 +194,10 @@ class RunStates:
             # TODO: add aborting state once we have it
         ]
+    @staticmethod
+    def notification_states():
+        return RunStates.terminal_states() + [RunStates.running]
     @staticmethod
     def run_state_to_pipeline_run_status(run_state: str):
         if not run_state:

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -140,11 +140,9 @@ from .model_monitoring import (
     FeatureSetFeatures,
     FeatureValues,
     GrafanaColumn,
-    GrafanaDataPoint,
     GrafanaNumberColumn,
     GrafanaStringColumn,
     GrafanaTable,
-    GrafanaTimeSeriesTarget,
     ModelEndpoint,
     ModelEndpointCreationStrategy,
     ModelEndpointList,

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -51,11 +51,9 @@ from .constants import (
 from .grafana import (
     GrafanaColumn,
     GrafanaColumnType,
-    GrafanaDataPoint,
     GrafanaNumberColumn,
     GrafanaStringColumn,
     GrafanaTable,
-    GrafanaTimeSeriesTarget,
 )
 from .model_endpoints import (
     Features,

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -250,11 +250,6 @@ class TSDBTarget(MonitoringStrEnum):
     TDEngine = "tdengine"
-class DefaultProfileName(StrEnum):
-    STREAM = "mm-infra-stream"
-    TSDB = "mm-infra-tsdb"
 class ProjectSecretKeys:
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
     TSDB_PROFILE_NAME = "TSDB_PROFILE_NAME"
@@ -473,8 +468,8 @@ FQN_REGEX = re.compile(FQN_PATTERN)
 # refer to `mlrun.utils.regex.project_name`
 PROJECT_PATTERN = r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"
 MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
+RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
 INTERSECT_DICT_KEYS = {
     ModelEndpointMonitoringMetricType.METRIC: "intersect_metrics",

mlrun/common/schemas/model_monitoring/grafana.py CHANGED Viewed

@@ -46,14 +46,20 @@ class GrafanaTable(BaseModel):
         self.rows.append(list(args))
-class GrafanaDataPoint(BaseModel):
-    value: float
-    timestamp: int  # Unix timestamp in milliseconds
-class GrafanaTimeSeriesTarget(BaseModel):
-    target: str
-    datapoints: list[tuple[float, int]] = []
-    def add_data_point(self, data_point: GrafanaDataPoint):
-        self.datapoints.append((data_point.value, data_point.timestamp))
+class GrafanaModelEndpointsTable(GrafanaTable):
+    def __init__(self):
+        columns = self._init_columns()
+        super().__init__(columns=columns)
+    @staticmethod
+    def _init_columns():
+        return [
+            GrafanaColumn(text="endpoint_id", type=GrafanaColumnType.STRING),
+            GrafanaColumn(text="endpoint_name", type=GrafanaColumnType.STRING),
+            GrafanaColumn(text="endpoint_function", type=GrafanaColumnType.STRING),
+            GrafanaColumn(text="endpoint_model", type=GrafanaColumnType.STRING),
+            GrafanaColumn(text="endpoint_model_class", type=GrafanaColumnType.STRING),
+            GrafanaColumn(text="error_count", type=GrafanaColumnType.NUMBER),
+            GrafanaColumn(text="drift_status", type=GrafanaColumnType.NUMBER),
+            GrafanaColumn(text="sampling_percentage", type=GrafanaColumnType.NUMBER),
+        ]

mlrun/config.py CHANGED Viewed

@@ -1366,35 +1366,6 @@ class Config:
             ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
         )
-    def get_s3_storage_options(self) -> dict[str, typing.Any]:
-        """
-        Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
-        graph uses this method for generating the storage options for S3 parquet target path.
-        :return: A storage options dictionary in which each key-value pair  represents a particular configuration,
-        such as endpoint_url or aws access key.
-        """
-        key = mlrun.get_secret_or_env("AWS_ACCESS_KEY_ID")
-        secret = mlrun.get_secret_or_env("AWS_SECRET_ACCESS_KEY")
-        force_non_anonymous = mlrun.get_secret_or_env("S3_NON_ANONYMOUS")
-        profile = mlrun.get_secret_or_env("AWS_PROFILE")
-        storage_options = dict(
-            anon=not (force_non_anonymous or (key and secret)),
-            key=key,
-            secret=secret,
-        )
-        endpoint_url = mlrun.get_secret_or_env("S3_ENDPOINT_URL")
-        if endpoint_url:
-            client_kwargs = {"endpoint_url": endpoint_url}
-            storage_options["client_kwargs"] = client_kwargs
-        if profile:
-            storage_options["profile"] = profile
-        return storage_options
     def is_explicit_ack_enabled(self) -> bool:
         return self.httpdb.nuclio.explicit_ack == "enabled" and (
             not self.nuclio_version
@@ -1402,13 +1373,6 @@ class Config:
             >= semver.VersionInfo.parse("1.12.10")
         )
-    @staticmethod
-    def get_ordered_keys():
-        # Define the keys to process first
-        return [
-            "MLRUN_HTTPDB__HTTP__VERIFY"  # Ensure this key is processed first for proper connection setup
-        ]
 # Global configuration
 config = Config.from_dict(default_config)
@@ -1626,6 +1590,15 @@ def read_env(env=None, prefix=env_prefix):
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.
     _convert_resources_to_str(config)
+    # If the environment variable MLRUN_HTTPDB__HTTP__VERIFY is set, we ensure SSL verification settings take precedence
+    # by moving the 'httpdb' configuration to the beginning of the config dictionary.
+    # This ensures that SSL verification is applied before other settings.
+    if "MLRUN_HTTPDB__HTTP__VERIFY" in env:
+        httpdb = config.pop("httpdb", None)
+        if httpdb:
+            config = {"httpdb": httpdb, **config}
     return config

mlrun/datastore/storeytargets.py CHANGED Viewed

@@ -42,9 +42,21 @@ def get_url_and_storage_options(path, external_storage_options=None):
 class TDEngineStoreyTarget(storey.TDEngineTarget):
-    def __init__(self, *args, **kwargs):
-        kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
-        super().__init__(*args, **kwargs)
+    def __init__(self, *args, url: str, **kwargs):
+        if url.startswith("ds://"):
+            datastore_profile = (
+                mlrun.datastore.datastore_profile.datastore_profile_read(url)
+            )
+            if not isinstance(
+                datastore_profile,
+                mlrun.datastore.datastore_profile.TDEngineDatastoreProfile,
+            ):
+                raise ValueError(
+                    f"Unexpected datastore profile type:{datastore_profile.type}."
+                    "Only TDEngineDatastoreProfile is supported"
+                )
+            url = datastore_profile.dsn()
+        super().__init__(*args, url=url, **kwargs)
 class StoreyTargetUtils:
@@ -69,7 +81,12 @@ class StoreyTargetUtils:
 class ParquetStoreyTarget(storey.ParquetTarget):
     def __init__(self, *args, **kwargs):
+        alt_key_name = kwargs.pop("alternative_v3io_access_key", None)
         args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
+        storage_options = kwargs.get("storage_options", {})
+        if storage_options and storage_options.get("v3io_access_key") and alt_key_name:
+            if alt_key := mlrun.get_secret_or_env(alt_key_name):
+                storage_options["v3io_access_key"] = alt_key
         super().__init__(*args, **kwargs)

mlrun/model_monitoring/applications/base.py CHANGED Viewed

@@ -95,8 +95,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         sample_data: Optional[pd.DataFrame] = None,
         reference_data: Optional[pd.DataFrame] = None,
         endpoints: Optional[list[tuple[str, str]]] = None,
-        start: Optional[datetime] = None,
-        end: Optional[datetime] = None,
+        start: Optional[str] = None,
+        end: Optional[str] = None,
         base_period: Optional[int] = None,
     ):
         """
@@ -124,7 +124,6 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
             return self.do_tracking(monitoring_context)
         if endpoints is not None:
-            start, end = self._validate_times(start, end, base_period)
             for window_start, window_end in self._window_generator(
                 start, end, base_period
             ):
@@ -137,43 +136,40 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
                             mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
                         }
                     )
-                    context.log_result(
-                        f"{endpoint_name}_{window_start.isoformat()}_{window_end.isoformat()}",
-                        result,
+                    result_key = (
+                        f"{endpoint_name}_{window_start.isoformat()}_{window_end.isoformat()}"
+                        if window_start and window_end
+                        else endpoint_name
                     )
+                    context.log_result(result_key, result)
         else:
             return call_do_tracking()
-    @staticmethod
-    def _validate_times(
-        start: Optional[datetime],
-        end: Optional[datetime],
-        base_period: Optional[int],
-    ) -> tuple[datetime, datetime]:
-        if (start is None) or (end is None):
-            raise mlrun.errors.MLRunValueError(
-                "When `endpoint_names` is provided, you must also pass the start and end times"
-            )
-        if (base_period is not None) and not (
-            isinstance(base_period, int) and base_period > 0
-        ):
-            raise mlrun.errors.MLRunValueError(
-                "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
-            )
-        return start, end
     @staticmethod
     def _window_generator(
-        start: datetime, end: datetime, base_period: Optional[int]
-    ) -> Iterator[tuple[datetime, datetime]]:
+        start: Optional[str], end: Optional[str], base_period: Optional[int]
+    ) -> Iterator[tuple[Optional[datetime], Optional[datetime]]]:
+        if start is None or end is None:
+            # A single window based on the `sample_data` input - see `_handler`.
+            yield None, None
+            return
+        start_dt = datetime.fromisoformat(start)
+        end_dt = datetime.fromisoformat(end)
         if base_period is None:
-            yield start, end
+            yield start_dt, end_dt
             return
+        if not isinstance(base_period, int) or base_period <= 0:
+            raise mlrun.errors.MLRunValueError(
+                "`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
+            )
         window_length = timedelta(minutes=base_period)
-        current_start_time = start
-        while current_start_time < end:
-            current_end_time = min(current_start_time + window_length, end)
+        current_start_time = start_dt
+        while current_start_time < end_dt:
+            current_end_time = min(current_start_time + window_length, end_dt)
             yield current_start_time, current_end_time
             current_start_time = current_end_time
@@ -369,13 +365,25 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
         :param requirements:      List of Python requirements to be installed in the image.
         :param requirements_file: Path to a Python requirements file to be installed in the image.
         :param endpoints:         A list of tuples of the model endpoint (name, uid) to get the data from.
-                                  If provided, you have to provide also the start and end times of the data to analyze.
-        :param start:             The start time of the sample data.
-        :param end:               The end time of the sample data.
+                                  If provided, and ``sample_data`` is not, you have to provide also the ``start`` and
+                                  ``end`` times of the data to analyze from the model endpoints.
+        :param start:             The start time of the endpoint's data, not included.
+                                  If you want the model endpoint's data at ``start`` included, you need to subtract a
+                                  small ``datetime.timedelta`` from it.
+        :param end:               The end time of the endpoint's data, included.
+                                  Please note: when ``start`` and ``end`` are set, they create a left-open time interval
+                                  ("window") :math:`(\\text{start}, \\text{end}]` that excludes the endpoint's data at
+                                  ``start`` and includes the data at ``end``:
+                                  :math:`\\text{start} < t \\leq \\text{end}`, :math:`t` is the time taken in the
+                                  window's data.
         :param base_period:       The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
                                   is taken. If an integer is specified, the application is run from ``start`` to ``end``
                                   in ``base_period`` length windows, except for the last window that ends at ``end`` and
-                                  therefore may be shorter.
+                                  therefore may be shorter:
+                                  :math:`(\\text{start}, \\text{start} + \\text{base_period}],
+                                  (\\text{start} + \\text{base_period}, \\text{start} + 2\\cdot\\text{base_period}],
+                                  ..., (\\text{start} + m\\cdot\\text{base_period}, \\text{end}]`,
+                                  where :math:`m` is some positive integer.
         :returns: The output of the
                   :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
@@ -395,16 +403,23 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
             project=project,
         )
-        params: dict[str, Union[list[tuple[str, str]], datetime, int, None]] = {}
+        params: dict[str, Union[list[tuple[str, str]], str, int, None]] = {}
         if endpoints:
-            start, end = cls._validate_times(start, end, base_period)
             params["endpoints"] = endpoints
-            params["start"] = start
-            params["end"] = end
-            params["base_period"] = base_period
+            if sample_data is None:
+                if start is None or end is None:
+                    raise mlrun.errors.MLRunValueError(
+                        "`start` and `end` times must be provided when `endpoints` "
+                        "is provided without `sample_data`"
+                    )
+                params["start"] = (
+                    start.isoformat() if isinstance(start, datetime) else start
+                )
+                params["end"] = end.isoformat() if isinstance(end, datetime) else end
+                params["base_period"] = base_period
         elif start or end or base_period:
             raise mlrun.errors.MLRunValueError(
-                "Custom start and end times or base_period are supported only with endpoints data"
+                "Custom `start` and `end` times or base_period are supported only with endpoints data"
             )
         inputs: dict[str, str] = {}

mlrun/model_monitoring/applications/results.py CHANGED Viewed

@@ -33,10 +33,10 @@ class _ModelMonitoringApplicationDataRes(ABC):
     name: str
     def __post_init__(self):
-        pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
+        pat = re.compile(mm_constants.RESULT_NAME_PATTERN)
         if not re.fullmatch(pat, self.name):
             raise mlrun.errors.MLRunValueError(
-                "Attribute name must comply with the regex `[a-zA-Z_][a-zA-Z0-9_]*`"
+                f"Attribute name must comply with the regex `{mm_constants.RESULT_NAME_PATTERN}`"
             )
     @abstractmethod

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -250,9 +250,10 @@ class MonitoringApplicationController:
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
         self.v3io_access_key = mlrun.mlconf.get_v3io_access_key()
-        self.storage_options = None
-        if mlrun.mlconf.artifact_path.startswith("s3://"):
-            self.storage_options = mlrun.mlconf.get_s3_storage_options()
+        store, _, _ = mlrun.store_manager.get_or_create_store(
+            mlrun.mlconf.artifact_path
+        )
+        self.storage_options = store.get_storage_options()
     @staticmethod
     def _get_model_monitoring_access_key() -> Optional[str]:

mlrun/model_monitoring/db/tsdb/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ import mlrun.common.schemas.secret
 import mlrun.datastore.datastore_profile
 import mlrun.errors
 import mlrun.model_monitoring.helpers
+from mlrun.datastore.datastore_profile import DatastoreProfile
 from .base import TSDBConnector
@@ -29,10 +30,13 @@ class ObjectTSDBFactory(enum.Enum):
     v3io_tsdb = "v3io-tsdb"
     tdengine = "tdengine"
-    def to_tsdb_connector(self, project: str, **kwargs) -> TSDBConnector:
+    def to_tsdb_connector(
+        self, project: str, profile: DatastoreProfile, **kwargs
+    ) -> TSDBConnector:
         """
         Return a TSDBConnector object based on the provided enum value.
         :param project: The name of the project.
+        :param profile: Datastore profile containing DSN and credentials for TSDB connection
         :return: `TSDBConnector` object.
         """
@@ -51,7 +55,7 @@ class ObjectTSDBFactory(enum.Enum):
         from .tdengine.tdengine_connector import TDEngineConnector
-        return TDEngineConnector(project=project, **kwargs)
+        return TDEngineConnector(project=project, profile=profile, **kwargs)
     @classmethod
     def _missing_(cls, value: typing.Any):
@@ -87,12 +91,10 @@ def get_tsdb_connector(
     kwargs = {}
     if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
         tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.V3IO_TSDB
-        kwargs["v3io_access_key"] = profile.v3io_access_key
     elif isinstance(
         profile, mlrun.datastore.datastore_profile.TDEngineDatastoreProfile
     ):
         tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
-        kwargs["connection_string"] = profile.dsn()
     else:
         extra_message = (
             ""
@@ -109,4 +111,6 @@ def get_tsdb_connector(
     tsdb_connector_factory = ObjectTSDBFactory(tsdb_connector_type)
     # Convert into TSDB connector object
-    return tsdb_connector_factory.to_tsdb_connector(project=project, **kwargs)
+    return tsdb_connector_factory.to_tsdb_connector(
+        project=project, profile=profile, **kwargs
+    )

mlrun 1.8.0rc30__py3-none-any.whl → 1.8.0rc31__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc30py3-none-any.whl → 1.8.0rc31py3-none-any.whl