PyPI - mlrun - Versions diffs - 1.8.0rc30__py3-none-any.whl → 1.8.0rc32__py3-none-any.whl - Mend

mlrun 1.8.0rc30py3-none-any.whl → 1.8.0rc32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (36) hide show

mlrun/__init__.py +2 -35
mlrun/api/schemas/__init__.py +1 -6
mlrun/common/runtimes/constants.py +4 -0
mlrun/common/schemas/__init__.py +0 -2
mlrun/common/schemas/model_monitoring/__init__.py +0 -2
mlrun/common/schemas/model_monitoring/constants.py +1 -6
mlrun/common/schemas/model_monitoring/grafana.py +17 -11
mlrun/config.py +9 -36
mlrun/datastore/storeytargets.py +20 -3
mlrun/db/base.py +1 -1
mlrun/db/httpdb.py +5 -4
mlrun/db/nopdb.py +1 -1
mlrun/model_monitoring/applications/base.py +111 -40
mlrun/model_monitoring/applications/results.py +2 -2
mlrun/model_monitoring/controller.py +4 -3
mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
mlrun/model_monitoring/db/tsdb/base.py +60 -39
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -52
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
mlrun/model_monitoring/helpers.py +16 -15
mlrun/model_monitoring/stream_processing.py +6 -13
mlrun/projects/pipelines.py +11 -3
mlrun/projects/project.py +88 -111
mlrun/serving/states.py +1 -1
mlrun/serving/v2_serving.py +20 -10
mlrun/utils/helpers.py +1 -1
mlrun/utils/logger.py +13 -10
mlrun/utils/notifications/notification_pusher.py +24 -0
mlrun/utils/regex.py +1 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/METADATA +2 -2
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/RECORD +36 -36
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/WHEEL +0 -0
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import asyncio
+import math
 from datetime import datetime, timedelta, timezone
 from io import StringIO
-from typing import Literal, Optional, Union
+from typing import Callable, Literal, Optional, Union
 import pandas as pd
 import v3io_frames
@@ -491,8 +492,9 @@ class V3IOTSDBConnector(TSDBConnector):
         interval: Optional[str] = None,
         agg_funcs: Optional[list[str]] = None,
         sliding_window_step: Optional[str] = None,
+        get_raw: bool = False,
         **kwargs,
-    ) -> pd.DataFrame:
+    ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
         """
          Getting records from V3IO TSDB data collection.
         :param table:                 Path to the collection to query.
@@ -517,6 +519,10 @@ class V3IOTSDBConnector(TSDBConnector):
                                       `sliding_window_step` is provided, interval must be provided as well. Provided
                                       as a string in the format of '1m', '1h', etc.
         :param kwargs:                Additional keyword arguments passed to the read method of frames client.
+        :param get_raw:               Whether to return the request as raw frames rather than a pandas dataframe.
+                                      Defaults to False. This can greatly improve performance when a dataframe isn't
+                                      needed.
         :return: DataFrame with the provided attributes from the data collection.
         :raise:  MLRunNotFoundError if the provided table wasn't found.
         """
@@ -530,7 +536,7 @@ class V3IOTSDBConnector(TSDBConnector):
         aggregators = ",".join(agg_funcs) if agg_funcs else None
         table_path = self.tables[table]
         try:
-            df = self.frames_client.read(
+            res = self.frames_client.read(
                 backend=_TSDB_BE,
                 table=table_path,
                 start=start,
@@ -540,15 +546,18 @@ class V3IOTSDBConnector(TSDBConnector):
                 aggregation_window=interval,
                 aggregators=aggregators,
                 step=sliding_window_step,
+                get_raw=get_raw,
                 **kwargs,
             )
+            if get_raw:
+                res = list(res)
         except v3io_frames.Error as err:
             if _is_no_schema_error(err):
-                return pd.DataFrame()
+                return [] if get_raw else pd.DataFrame()
             else:
                 raise err
-        return df
+        return res
     def _get_v3io_source_directory(self) -> str:
         """
@@ -778,16 +787,23 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids: Union[str, list[str]],
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
-    ) -> pd.DataFrame:
+        get_raw: bool = False,
+    ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
         filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start, end = self._get_start_end(start, end)
-        df = self._get_records(
+        res = self._get_records(
             table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
             filter_query=filter_query,
             agg_funcs=["last"],
+            get_raw=get_raw,
         )
+        if get_raw:
+            return res
+        df = res
         if not df.empty:
             df.rename(
                 columns={
@@ -811,11 +827,12 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids: Union[str, list[str]],
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
-    ) -> pd.DataFrame:
+        get_raw: bool = False,
+    ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
         filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
-        df = self._get_records(
+        res = self._get_records(
             table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
             start=start,
             end=end,
@@ -823,7 +840,12 @@ class V3IOTSDBConnector(TSDBConnector):
             filter_query=filter_query,
             agg_funcs=["max"],
             group_by="endpoint_id",
+            get_raw=get_raw,
         )
+        if get_raw:
+            return res
+        df = res
         if not df.empty:
             df.columns = [
                 col[len("max(") : -1] if "max(" in col else col for col in df.columns
@@ -884,21 +906,28 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids: Union[str, list[str]],
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
-    ) -> pd.DataFrame:
+        get_raw: bool = False,
+    ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
         filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         if filter_query:
             filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
         else:
             filter_query = f"{mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}' z"
         start, end = self._get_start_end(start, end)
-        df = self._get_records(
+        res = self._get_records(
             table=mm_schemas.FileTargetKind.ERRORS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.ERROR_COUNT],
             filter_query=filter_query,
             agg_funcs=["count"],
+            get_raw=get_raw,
         )
+        if get_raw:
+            return res
+        df = res
         if not df.empty:
             df.rename(
                 columns={
@@ -914,18 +943,25 @@ class V3IOTSDBConnector(TSDBConnector):
         endpoint_ids: Union[str, list[str]],
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
-    ) -> pd.DataFrame:
+        get_raw: bool = False,
+    ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
         filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
         start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
         start, end = self._get_start_end(start, end)
-        df = self._get_records(
+        res = self._get_records(
             table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.LATENCY],
             filter_query=filter_query,
             agg_funcs=["avg"],
+            get_raw=get_raw,
         )
+        if get_raw:
+            return res
+        df = res
         if not df.empty:
             df.dropna(inplace=True)
             df.rename(
@@ -935,3 +971,93 @@ class V3IOTSDBConnector(TSDBConnector):
                 inplace=True,
             )
         return df.reset_index(drop=True)
+    async def add_basic_metrics(
+        self,
+        model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
+        project: str,
+        run_in_threadpool: Callable,
+    ) -> list[mlrun.common.schemas.ModelEndpoint]:
+        """
+        Fetch basic metrics from V3IO TSDB and add them to MEP objects.
+        :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
+                                        be filled with the relevant basic metrics.
+        :param project:                The name of the project.
+        :param run_in_threadpool:      A function that runs another function in a thread pool.
+        :return: A list of `ModelEndpointMonitoringMetric` objects.
+        """
+        uids = []
+        model_endpoint_objects_by_uid = {}
+        for model_endpoint_object in model_endpoint_objects:
+            uid = model_endpoint_object.metadata.uid
+            uids.append(uid)
+            model_endpoint_objects_by_uid[uid] = model_endpoint_object
+        coroutines = [
+            run_in_threadpool(
+                self.get_error_count,
+                endpoint_ids=uids,
+                get_raw=True,
+            ),
+            run_in_threadpool(
+                self.get_last_request,
+                endpoint_ids=uids,
+                get_raw=True,
+            ),
+            run_in_threadpool(
+                self.get_avg_latency,
+                endpoint_ids=uids,
+                get_raw=True,
+            ),
+            run_in_threadpool(
+                self.get_drift_status,
+                endpoint_ids=uids,
+                get_raw=True,
+            ),
+        ]
+        (
+            error_count_res,
+            last_request_res,
+            avg_latency_res,
+            drift_status_res,
+        ) = await asyncio.gather(*coroutines)
+        def add_metric(
+            metric: str,
+            column_name: str,
+            frames: list,
+        ):
+            for frame in frames:
+                endpoint_ids = frame.column_data("endpoint_id")
+                metric_data = frame.column_data(column_name)
+                for index, endpoint_id in enumerate(endpoint_ids):
+                    mep = model_endpoint_objects_by_uid.get(endpoint_id)
+                    value = metric_data[index]
+                    if mep and value is not None and not math.isnan(value):
+                        setattr(mep.status, metric, value)
+        add_metric(
+            "error_count",
+            "count(error_count)",
+            error_count_res,
+        )
+        add_metric(
+            "last_request",
+            "last(last_request_timestamp)",
+            last_request_res,
+        )
+        add_metric(
+            "avg_latency",
+            "max(result_status)",
+            drift_status_res,
+        )
+        add_metric(
+            "result_status",
+            "avg(latency)",
+            avg_latency_res,
+        )
+        return list(model_endpoint_objects_by_uid.values())

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -246,21 +246,6 @@ def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataI
     )
-def get_tsdb_connection_string(
-    secret_provider: Optional[Callable[[str], str]] = None,
-) -> str:
-    """Get TSDB connection string from the project secret. If wasn't set, take it from the system
-    configurations.
-    :param secret_provider: An optional secret provider to get the connection string secret.
-    :return:                Valid TSDB connection string.
-    """
-    return mlrun.get_secret_or_env(
-        key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
-        secret_provider=secret_provider,
-    )
 def _get_profile(
     project: str,
     secret_provider: Optional[Callable[[str], str]],
@@ -554,6 +539,22 @@ def get_result_instance_fqn(
     return f"{model_endpoint_id}.{app_name}.result.{result_name}"
+def get_alert_name_from_result_fqn(result_fqn: str):
+    """
+    :param   result_fqn: current get_result_instance_fqn format: `{model_endpoint_id}.{app_name}.result.{result_name}`
+    :return: shorter fqn without forbidden alert characters.
+    """
+    if result_fqn.count(".") != 3 or result_fqn.split(".")[2] != "result":
+        raise mlrun.errors.MLRunValueError(
+            f"result_fqn: {result_fqn} is not in the correct format: {{model_endpoint_id}}.{{app_name}}."
+            f"result.{{result_name}}"
+        )
+    # Name format cannot contain "."
+    # The third component is always `result`, so it is not necessary for checking uniqueness.
+    return "_".join(result_fqn.split(".")[i] for i in [0, 1, 3])
 def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
     return get_result_instance_fqn(
         model_endpoint_id,

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -13,7 +13,6 @@
 # limitations under the License.
 import datetime
-import os
 import typing
 import storey
@@ -65,14 +64,11 @@ class EventStreamProcessor:
             parquet_batching_max_events=self.parquet_batching_max_events,
         )
-        self.storage_options = None
         self.tsdb_configurations = {}
         if not mlrun.mlconf.is_ce_mode():
             self._initialize_v3io_configurations(
                 model_monitoring_access_key=model_monitoring_access_key
             )
-        elif self.parquet_path.startswith("s3://"):
-            self.storage_options = mlrun.mlconf.get_s3_storage_options()
     def _initialize_v3io_configurations(
         self,
@@ -87,17 +83,12 @@ class EventStreamProcessor:
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
         self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
-        self.v3io_access_key = v3io_access_key or mlrun.get_secret_or_env(
-            "V3IO_ACCESS_KEY"
-        )
+        self.v3io_access_key = v3io_access_key or mlrun.mlconf.get_v3io_access_key()
         self.model_monitoring_access_key = (
             model_monitoring_access_key
-            or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
+            or mlrun.get_secret_or_env(ProjectSecretKeys.ACCESS_KEY)
             or self.v3io_access_key
         )
-        self.storage_options = dict(
-            v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
-        )
         # TSDB path and configurations
         tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
@@ -248,12 +239,12 @@ class EventStreamProcessor:
         # Write the Parquet target file, partitioned by key (endpoint_id) and time.
         def apply_parquet_target():
             graph.add_step(
-                "storey.ParquetTarget",
+                "mlrun.datastore.storeytargets.ParquetStoreyTarget",
+                alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
                 name="ParquetTarget",
                 after="ProcessBeforeParquet",
                 graph_shape="cylinder",
                 path=self.parquet_path,
-                storage_options=self.storage_options,
                 max_events=self.parquet_batching_max_events,
                 flush_after_seconds=self.parquet_batching_timeout_secs,
                 attributes={"infer_columns_from_data": True},
@@ -794,6 +785,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         """
         event[mapping_dictionary] = {}
+        diff = len(named_iters) - len(values_iters)
+        values_iters += [None] * diff
         for name, value in zip(named_iters, values_iters):
             event[name] = value
             event[mapping_dictionary][name] = value

mlrun/projects/pipelines.py CHANGED Viewed

@@ -615,13 +615,21 @@ class _KFPRunner(_PipelineRunner):
                 "Notifications will only be sent if you wait for pipeline completion. "
                 "Some of the features (like setting message or severity level) are not supported."
             )
-            # for start message, fallback to old notification behavior
             for notification in notifications or []:
                 params = notification.params
                 params.update(notification.secret_params)
-                project.notifiers.add_notification(notification.kind, params)
+                project.notifiers.add_notification(
+                    notification_type=notification.kind,
+                    params=params,
+                    name=notification.name,
+                    message=notification.message,
+                    severity=notification.severity,
+                    when=notification.when,
+                    condition=notification.condition,
+                    secret_params=notification.secret_params,
+                )
-            project.spec.notifications = notifications
+        project.spec.notifications = project.notifiers.server_notifications
         run_id = _run_pipeline(
             workflow_handler,

mlrun 1.8.0rc30__py3-none-any.whl → 1.8.0rc32__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc30py3-none-any.whl → 1.8.0rc32py3-none-any.whl