PyPI - mlrun - Versions diffs - 1.7.0rc43__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl - Mend

mlrun 1.7.0rc43py3-none-any.whl → 1.7.0rc55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (68) hide show

mlrun/__main__.py +4 -2
mlrun/artifacts/manager.py +3 -1
mlrun/common/formatters/__init__.py +1 -0
mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
mlrun/common/formatters/run.py +3 -0
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/alert.py +11 -11
mlrun/common/schemas/auth.py +5 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +23 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +24 -47
mlrun/common/schemas/notification.py +12 -2
mlrun/common/schemas/workflow.py +10 -2
mlrun/config.py +28 -21
mlrun/data_types/data_types.py +6 -1
mlrun/datastore/base.py +4 -4
mlrun/datastore/s3.py +12 -9
mlrun/datastore/storeytargets.py +9 -6
mlrun/db/base.py +3 -0
mlrun/db/httpdb.py +28 -16
mlrun/db/nopdb.py +24 -4
mlrun/errors.py +7 -1
mlrun/execution.py +40 -7
mlrun/feature_store/api.py +1 -0
mlrun/feature_store/retrieval/spark_merger.py +7 -7
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -3
mlrun/launcher/client.py +6 -6
mlrun/model.py +29 -0
mlrun/model_monitoring/api.py +1 -12
mlrun/model_monitoring/applications/__init__.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +5 -1
mlrun/model_monitoring/applications/base.py +2 -182
mlrun/model_monitoring/applications/context.py +2 -9
mlrun/model_monitoring/applications/evidently_base.py +0 -74
mlrun/model_monitoring/applications/histogram_data_drift.py +2 -2
mlrun/model_monitoring/applications/results.py +4 -4
mlrun/model_monitoring/controller.py +46 -209
mlrun/model_monitoring/db/stores/base/store.py +1 -0
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +15 -1
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +12 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +17 -16
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +49 -39
mlrun/model_monitoring/helpers.py +13 -15
mlrun/model_monitoring/writer.py +3 -1
mlrun/projects/operations.py +11 -8
mlrun/projects/pipelines.py +35 -16
mlrun/projects/project.py +52 -24
mlrun/render.py +3 -3
mlrun/runtimes/daskjob.py +1 -1
mlrun/runtimes/kubejob.py +6 -6
mlrun/runtimes/nuclio/api_gateway.py +12 -0
mlrun/runtimes/nuclio/application/application.py +3 -3
mlrun/runtimes/nuclio/function.py +41 -0
mlrun/runtimes/nuclio/serving.py +2 -2
mlrun/runtimes/pod.py +19 -13
mlrun/serving/server.py +2 -0
mlrun/utils/helpers.py +62 -16
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +126 -44
{mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +67 -68
{mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/evidently_application.py +0 -20
{mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -15,28 +15,22 @@
 import concurrent.futures
 import datetime
 import json
-import multiprocessing
 import os
 import re
 from collections.abc import Iterator
-from typing import Any, NamedTuple, Optional, Union, cast
+from typing import NamedTuple, Optional, Union, cast
 import nuclio
 import mlrun
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.data_types.infer
-import mlrun.feature_store as fstore
 import mlrun.model_monitoring.db.stores
-from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
 from mlrun.datastore import get_stream_pusher
-from mlrun.datastore.targets import ParquetTarget
 from mlrun.errors import err_to_str
 from mlrun.model_monitoring.helpers import (
     _BatchDict,
     batch_dict2timedelta,
-    calculate_inputs_statistics,
-    get_monitoring_parquet_path,
     get_stream_path,
 )
 from mlrun.utils import datetime_now, logger
@@ -219,7 +213,7 @@ class _BatchWindowGenerator:
             # If the endpoint does not have a stream, `last_updated` should be
             # the minimum between the current time and the last updated time.
             # This compensates for the bumping mechanism - see
-            # `bump_model_endpoint_last_request`.
+            # `update_model_endpoint_last_request`.
             last_updated = min(int(datetime_now().timestamp()), last_updated)
             logger.debug(
                 "The endpoint does not have a stream", last_updated=last_updated
@@ -292,15 +286,9 @@ class MonitoringApplicationController:
         )
         self.model_monitoring_access_key = self._get_model_monitoring_access_key()
-        self.parquet_directory = get_monitoring_parquet_path(
-            self.project_obj,
-            kind=mm_constants.FileTargetKind.APPS_PARQUET,
+        self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
+            project=self.project
         )
-        self.storage_options = None
-        if not mlrun.mlconf.is_ce_mode():
-            self._initialize_v3io_configurations()
-        elif self.parquet_directory.startswith("s3://"):
-            self.storage_options = mlrun.mlconf.get_s3_storage_options()
     @staticmethod
     def _get_model_monitoring_access_key() -> Optional[str]:
@@ -310,12 +298,6 @@ class MonitoringApplicationController:
             access_key = mlrun.mlconf.get_v3io_access_key()
         return access_key
-    def _initialize_v3io_configurations(self) -> None:
-        self.storage_options = dict(
-            v3io_access_key=self.model_monitoring_access_key,
-            v3io_api=mlrun.mlconf.v3io_api,
-        )
     def run(self) -> None:
         """
         Main method for run all the relevant monitoring applications on each endpoint.
@@ -367,11 +349,8 @@ class MonitoringApplicationController:
             )
             return
         # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
-        with concurrent.futures.ProcessPoolExecutor(
+        with concurrent.futures.ThreadPoolExecutor(
             max_workers=min(len(endpoints), 10),
-            # On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
-            # and resources (such as sockets), which is not what we want (ML-7160)
-            mp_context=multiprocessing.get_context("spawn"),
         ) as pool:
             for endpoint in endpoints:
                 if (
@@ -395,13 +374,10 @@ class MonitoringApplicationController:
                         applications_names=applications_names,
                         batch_window_generator=self._batch_window_generator,
                         project=self.project,
-                        parquet_directory=self.parquet_directory,
-                        storage_options=self.storage_options,
                         model_monitoring_access_key=self.model_monitoring_access_key,
+                        tsdb_connector=self.tsdb_connector,
                     )
-        self._delete_old_parquet(endpoints=endpoints)
     @classmethod
     def model_endpoint_process(
         cls,
@@ -409,9 +385,8 @@ class MonitoringApplicationController:
         applications_names: list[str],
         batch_window_generator: _BatchWindowGenerator,
         project: str,
-        parquet_directory: str,
-        storage_options: dict,
         model_monitoring_access_key: str,
+        tsdb_connector: mlrun.model_monitoring.db.tsdb.TSDBConnector,
     ) -> None:
         """
         Process a model endpoint and trigger the monitoring applications. This function running on different process
@@ -422,16 +397,13 @@ class MonitoringApplicationController:
         :param applications_names:          (list[str]) List of application names to push results to.
         :param batch_window_generator:      (_BatchWindowGenerator) An object that generates _BatchWindow objects.
         :param project:                     (str) Project name.
-        :param parquet_directory:           (str) Directory to store application parquet files
-        :param storage_options:             (dict) Storage options for writing ParquetTarget.
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
+        :param tsdb_connector:              (mlrun.model_monitoring.db.tsdb.TSDBConnector) TSDB connector
         """
         endpoint_id = endpoint[mm_constants.EventFieldType.UID]
+        # if false the endpoint represent batch infer step.
+        has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
         try:
-            m_fs = fstore.get_feature_set(
-                endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
-            )
             for application in applications_names:
                 batch_window = batch_window_generator.get_batch_window(
                     project=project,
@@ -439,158 +411,70 @@ class MonitoringApplicationController:
                     application=application,
                     first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
                     last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
-                    has_stream=endpoint[mm_constants.EventFieldType.STREAM_PATH] != "",
+                    has_stream=has_stream,
                 )
                 for start_infer_time, end_infer_time in batch_window.get_intervals():
-                    # start - TODO : delete in 1.9.0 (V1 app deprecation)
-                    try:
-                        # Get application sample data
-                        offline_response = cls._get_sample_df(
-                            feature_set=m_fs,
+                    prediction_metric = tsdb_connector.read_predictions(
+                        endpoint_id=endpoint_id,
+                        start=start_infer_time,
+                        end=end_infer_time,
+                    )
+                    if not prediction_metric.data and has_stream:
+                        logger.info(
+                            "No data found for the given interval",
+                            start=start_infer_time,
+                            end=end_infer_time,
+                            endpoint_id=endpoint_id,
+                        )
+                    else:
+                        logger.info(
+                            "Data found for the given interval",
+                            start=start_infer_time,
+                            end=end_infer_time,
                             endpoint_id=endpoint_id,
+                        )
+                        cls._push_to_applications(
                             start_infer_time=start_infer_time,
                             end_infer_time=end_infer_time,
-                            parquet_directory=parquet_directory,
-                            storage_options=storage_options,
-                            application_name=application,
-                        )
-                        df = offline_response.to_dataframe()
-                        parquet_target_path = offline_response.vector.get_target_path()
-                        if len(df) == 0:
-                            logger.info(
-                                "During this time window, the endpoint has not received any data",
-                                endpoint=endpoint[mm_constants.EventFieldType.UID],
-                                start_time=start_infer_time,
-                                end_time=end_infer_time,
-                            )
-                            continue
-                    except FileNotFoundError:
-                        logger.warn(
-                            "No parquets were written yet",
-                            endpoint=endpoint[mm_constants.EventFieldType.UID],
+                            endpoint_id=endpoint_id,
+                            project=project,
+                            applications_names=[application],
+                            model_monitoring_access_key=model_monitoring_access_key,
                         )
-                        continue
-                    # Get the timestamp of the latest request:
-                    latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
-                    # Get the feature stats from the model endpoint for reference data
-                    feature_stats = json.loads(
-                        endpoint[mm_constants.EventFieldType.FEATURE_STATS]
-                    )
-                    # Pad the original feature stats to accommodate current
-                    # data out of the original range (unless already padded)
-                    pad_features_hist(FeatureStats(feature_stats))
-                    # Get the current stats:
-                    current_stats = calculate_inputs_statistics(
-                        sample_set_statistics=feature_stats, inputs=df
-                    )
-                    # end - TODO : delete in 1.9.0 (V1 app deprecation)
-                    cls._push_to_applications(
-                        current_stats=current_stats,
-                        feature_stats=feature_stats,
-                        start_infer_time=start_infer_time,
-                        end_infer_time=end_infer_time,
-                        endpoint_id=endpoint_id,
-                        latest_request=latest_request,
-                        project=project,
-                        applications_names=[application],
-                        model_monitoring_access_key=model_monitoring_access_key,
-                        parquet_target_path=parquet_target_path,
-                    )
         except Exception:
             logger.exception(
                 "Encountered an exception",
                 endpoint_id=endpoint[mm_constants.EventFieldType.UID],
             )
-    def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
-        """
-        Delete application parquets older than the argument days.
-        :param endpoints: A list of dictionaries of model endpoints records.
-        """
-        if self.parquet_directory.startswith("v3io:///"):
-            # create fs with access to the user side (under projects)
-            store, _, _ = mlrun.store_manager.get_or_create_store(
-                self.parquet_directory,
-                {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
-            )
-            fs = store.filesystem
-            # calculate time threshold (keep only files from the last 24 hours)
-            time_to_keep = (
-                datetime.datetime.now(tz=datetime.timezone.utc)
-                - datetime.timedelta(days=days)
-            ).timestamp()
-            for endpoint in endpoints:
-                try:
-                    apps_parquet_directories = fs.listdir(
-                        path=f"{self.parquet_directory}"
-                        f"/key={endpoint[mm_constants.EventFieldType.UID]}"
-                    )
-                    for directory in apps_parquet_directories:
-                        if directory["mtime"] < time_to_keep:
-                            # Delete files
-                            fs.rm(path=directory["name"], recursive=True)
-                            # Delete directory
-                            fs.rmdir(path=directory["name"])
-                except FileNotFoundError:
-                    logger.info(
-                        "Application parquet directory is empty, "
-                        "probably parquets have not yet been created for this app",
-                        endpoint=endpoint[mm_constants.EventFieldType.UID],
-                        path=f"{self.parquet_directory}"
-                        f"/key={endpoint[mm_constants.EventFieldType.UID]}",
-                    )
     @staticmethod
     def _push_to_applications(
-        current_stats,
-        feature_stats,
-        start_infer_time,
-        end_infer_time,
-        endpoint_id,
-        latest_request,
-        project,
-        applications_names,
-        model_monitoring_access_key,
-        parquet_target_path,
+        start_infer_time: datetime.datetime,
+        end_infer_time: datetime.datetime,
+        endpoint_id: str,
+        project: str,
+        applications_names: list[str],
+        model_monitoring_access_key: str,
     ):
         """
         Pushes data to multiple stream applications.
-        :param current_stats:       Current statistics of input data.
-        :param feature_stats:       Statistics of train features.
-        :param start_infer_time:    The beginning of the infer interval window.
-        :param end_infer_time:      The end of the infer interval window.
-        :param endpoint_id:         Identifier for the model endpoint.
-        :param latest_request:      Timestamp of the latest model request.
-        :param project: mlrun       Project name.
-        :param applications_names:  List of application names to which data will be pushed.
+        :param start_infer_time:            The beginning of the infer interval window.
+        :param end_infer_time:              The end of the infer interval window.
+        :param endpoint_id:                 Identifier for the model endpoint.
+        :param project: mlrun               Project name.
+        :param applications_names:          List of application names to which data will be pushed.
+        :param model_monitoring_access_key: Access key to apply the model monitoring process.
         """
         data = {
-            mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
-            mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
-            mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: parquet_target_path,
             mm_constants.ApplicationEvent.START_INFER_TIME: start_infer_time.isoformat(
                 sep=" ", timespec="microseconds"
             ),
             mm_constants.ApplicationEvent.END_INFER_TIME: end_infer_time.isoformat(
                 sep=" ", timespec="microseconds"
             ),
-            mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
-                sep=" ", timespec="microseconds"
-            ),
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
@@ -608,53 +492,6 @@ class MonitoringApplicationController:
                 [data]
             )
-    @staticmethod
-    def _get_sample_df(
-        feature_set: mlrun.common.schemas.FeatureSet,
-        endpoint_id: str,
-        start_infer_time: datetime.datetime,
-        end_infer_time: datetime.datetime,
-        parquet_directory: str,
-        storage_options: dict,
-        application_name: str,
-    ) -> mlrun.feature_store.OfflineVectorResponse:
-        """
-        Retrieves a sample DataFrame of the current input according to the provided infer interval window.
-        :param feature_set:         The main feature set.
-        :param endpoint_id:         Identifier for the model endpoint.
-        :param start_infer_time:    The beginning of the infer interval window.
-        :param end_infer_time:      The end of the infer interval window.
-        :param parquet_directory:   Directory where Parquet files are stored.
-        :param storage_options:     Storage options for accessing the data.
-        :param application_name:    Current application name.
-        :return: OfflineVectorResponse that can be used for generating a sample DataFrame for the specified endpoint.
-        """
-        features = [f"{feature_set.metadata.name}.*"]
-        vector = fstore.FeatureVector(
-            name=f"{endpoint_id}_vector",
-            features=features,
-            with_indexes=True,
-        )
-        vector.metadata.tag = application_name
-        vector.feature_set_objects = {feature_set.metadata.name: feature_set}
-        # get offline features based on application start and end time.
-        # store the result parquet by partitioning by controller end processing time
-        offline_response = vector.get_offline_features(
-            start_time=start_infer_time,
-            end_time=end_infer_time,
-            timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
-            target=ParquetTarget(
-                path=parquet_directory
-                + f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
-                storage_options=storage_options,
-            ),
-        )
-        return offline_response
 def handler(context: nuclio.Context, event: nuclio.Event) -> None:
     """

mlrun/model_monitoring/db/stores/base/store.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
 import typing
 from abc import ABC, abstractmethod

mlrun/model_monitoring/db/stores/sqldb/sql_store.py CHANGED Viewed

@@ -588,7 +588,11 @@ class SQLStoreBase(StoreBase):
         for endpoint_dict in endpoints:
             endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
+            logger.debug(
+                "Deleting model endpoint resources from the SQL tables",
+                endpoint_id=endpoint_id,
+                project=self.project,
+            )
             # Delete last analyzed records
             self._delete_last_analyzed(endpoint_id=endpoint_id)
@@ -598,6 +602,16 @@ class SQLStoreBase(StoreBase):
             # Delete model endpoint record
             self.delete_model_endpoint(endpoint_id=endpoint_id)
+            logger.debug(
+                "Successfully deleted model endpoint resources",
+                endpoint_id=endpoint_id,
+                project=self.project,
+            )
+        logger.debug(
+            "Successfully deleted model monitoring endpoints resources from the SQL tables",
+            project=self.project,
+        )
     def get_model_endpoint_metrics(
         self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType

mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py CHANGED Viewed

@@ -305,10 +305,22 @@ class KVStoreBase(StoreBase):
                 endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
             else:
                 endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
+            logger.debug(
+                "Deleting model endpoint resources from the V3IO KV table",
+                endpoint_id=endpoint_id,
+                project=self.project,
+            )
             self.delete_model_endpoint(
                 endpoint_id,
             )
+        logger.debug(
+            "Successfully deleted model monitoring endpoints from the V3IO KV table",
+            project=self.project,
+        )
         # Delete remain records in the KV
         all_records = self.client.kv.new_cursor(
             container=self.container,

mlrun/model_monitoring/db/tsdb/tdengine/schemas.py CHANGED Viewed

@@ -94,38 +94,39 @@ class TDEngineSchema:
         tags = ", ".join(f"{col} {val}" for col, val in self.tags.items())
         return f"CREATE STABLE if NOT EXISTS {self.database}.{self.super_table} ({columns}) TAGS ({tags});"
-    def _create_subtable_query(
+    def _create_subtable_sql(
         self,
         subtable: str,
         values: dict[str, Union[str, int, float, datetime.datetime]],
     ) -> str:
         try:
-            values = ", ".join(f"'{values[val]}'" for val in self.tags)
+            tags = ", ".join(f"'{values[val]}'" for val in self.tags)
         except KeyError:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"values must contain all tags: {self.tags.keys()}"
             )
-        return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({values});"
+        return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({tags});"
-    def _insert_subtable_query(
-        self,
-        connection: taosws.Connection,
+    @staticmethod
+    def _insert_subtable_stmt(
+        statement: taosws.TaosStmt,
+        columns: dict[str, _TDEngineColumn],
         subtable: str,
         values: dict[str, Union[str, int, float, datetime.datetime]],
     ) -> taosws.TaosStmt:
-        stmt = connection.statement()
-        question_marks = ", ".join("?" * len(self.columns))
-        stmt.prepare(f"INSERT INTO ? VALUES ({question_marks});")
-        stmt.set_tbname_tags(subtable, [])
+        question_marks = ", ".join("?" * len(columns))
+        statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
+        statement.set_tbname(subtable)
         bind_params = []
-        for col_name, col_type in self.columns.items():
+        for col_name, col_type in columns.items():
             val = values[col_name]
             bind_params.append(values_to_column([val], col_type))
-        stmt.bind_param(bind_params)
-        return stmt
+        statement.bind_param(bind_params)
+        statement.add_batch()
+        return statement
     def _delete_subtable_query(
         self,
@@ -163,8 +164,8 @@ class TDEngineSchema:
     @staticmethod
     def _get_records_query(
         table: str,
-        start: datetime,
-        end: datetime,
+        start: datetime.datetime,
+        end: datetime.datetime,
         columns_to_filter: list[str] = None,
         filter_query: Optional[str] = None,
         interval: Optional[str] = None,
@@ -211,7 +212,7 @@ class TDEngineSchema:
                 if filter_query:
                     query.write(f"{filter_query} AND ")
                 if start:
-                    query.write(f"{timestamp_column} >= '{start}'" + " AND ")
+                    query.write(f"{timestamp_column} >= '{start}' AND ")
                 if end:
                     query.write(f"{timestamp_column} <= '{end}'")
             if interval:

mlrun 1.7.0rc43__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc43py3-none-any.whl → 1.7.0rc55py3-none-any.whl