PyPI - mlrun - Versions diffs - 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl - Mend

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show

mlrun/__main__.py +4 -2
mlrun/alerts/alert.py +75 -8
mlrun/artifacts/base.py +1 -0
mlrun/artifacts/manager.py +9 -2
mlrun/common/constants.py +4 -1
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
mlrun/common/formatters/run.py +3 -0
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/__init__.py +3 -1
mlrun/common/schemas/alert.py +15 -12
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/auth.py +5 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/frontend_spec.py +7 -0
mlrun/common/schemas/function.py +7 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -3
mlrun/common/schemas/model_monitoring/constants.py +41 -26
mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
mlrun/common/schemas/notification.py +69 -12
mlrun/common/schemas/project.py +45 -12
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +1 -0
mlrun/config.py +91 -35
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +57 -25
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/alibaba_oss.py +3 -2
mlrun/datastore/azure_blob.py +125 -37
mlrun/datastore/base.py +42 -21
mlrun/datastore/datastore.py +4 -2
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +85 -29
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +25 -12
mlrun/datastore/sources.py +76 -4
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +102 -131
mlrun/datastore/v3io.py +1 -0
mlrun/db/base.py +15 -6
mlrun/db/httpdb.py +57 -28
mlrun/db/nopdb.py +29 -5
mlrun/errors.py +20 -3
mlrun/execution.py +46 -5
mlrun/feature_store/api.py +25 -1
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/spark_merger.py +10 -39
mlrun/feature_store/steps.py +8 -0
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -3
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/k8s_utils.py +48 -2
mlrun/launcher/client.py +6 -6
mlrun/launcher/local.py +2 -2
mlrun/model.py +215 -34
mlrun/model_monitoring/api.py +38 -24
mlrun/model_monitoring/applications/__init__.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +60 -29
mlrun/model_monitoring/applications/base.py +2 -174
mlrun/model_monitoring/applications/context.py +197 -70
mlrun/model_monitoring/applications/evidently_base.py +11 -85
mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
mlrun/model_monitoring/applications/results.py +4 -4
mlrun/model_monitoring/controller.py +110 -282
mlrun/model_monitoring/db/stores/__init__.py +8 -3
mlrun/model_monitoring/db/stores/base/store.py +3 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
mlrun/model_monitoring/db/tsdb/base.py +147 -15
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
mlrun/model_monitoring/helpers.py +70 -50
mlrun/model_monitoring/stream_processing.py +96 -195
mlrun/model_monitoring/writer.py +13 -5
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/operations.py +16 -8
mlrun/projects/pipelines.py +126 -115
mlrun/projects/project.py +286 -129
mlrun/render.py +3 -3
mlrun/run.py +38 -19
mlrun/runtimes/__init__.py +19 -8
mlrun/runtimes/base.py +4 -1
mlrun/runtimes/daskjob.py +1 -1
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/kubejob.py +6 -6
mlrun/runtimes/local.py +12 -5
mlrun/runtimes/nuclio/api_gateway.py +68 -8
mlrun/runtimes/nuclio/application/application.py +307 -70
mlrun/runtimes/nuclio/function.py +63 -14
mlrun/runtimes/nuclio/serving.py +10 -10
mlrun/runtimes/pod.py +25 -19
mlrun/runtimes/remotesparkjob.py +2 -5
mlrun/runtimes/sparkjob/spark3job.py +16 -17
mlrun/runtimes/utils.py +34 -0
mlrun/serving/routers.py +2 -5
mlrun/serving/server.py +37 -19
mlrun/serving/states.py +30 -3
mlrun/serving/v2_serving.py +44 -35
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +1 -1
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +150 -36
mlrun/utils/http.py +1 -1
mlrun/utils/notifications/notification/__init__.py +0 -1
mlrun/utils/notifications/notification/webhook.py +8 -1
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/evidently_application.py +0 -20
mlrun/model_monitoring/prometheus.py +0 -216
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from datetime import datetime
+from datetime import datetime, timezone
 from io import StringIO
 from typing import Literal, Optional, Union
@@ -24,6 +24,7 @@ import mlrun.common.model_monitoring
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.feature_store.steps
 import mlrun.utils.v3io_clients
+from mlrun.common.schemas import EventFieldType
 from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.model_monitoring.helpers import get_invocations_fqn
 from mlrun.utils import logger
@@ -33,7 +34,7 @@ _TSDB_RATE = "1/s"
 _CONTAINER = "users"
-def _is_no_schema_error(exc: v3io_frames.ReadError) -> bool:
+def _is_no_schema_error(exc: v3io_frames.Error) -> bool:
     """
     In case of a nonexistent TSDB table - a `v3io_frames.ReadError` error is raised.
     Check if the error message contains the relevant string to verify the cause.
@@ -64,14 +65,17 @@ class V3IOTSDBConnector(TSDBConnector):
         self.container = container
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
-        self._frames_client: v3io_frames.client.ClientBase = (
-            self._get_v3io_frames_client(self.container)
-        )
+        self._frames_client: Optional[v3io_frames.client.ClientBase] = None
         self._init_tables_path()
+        self._create_table = create_table
-        if create_table:
-            self.create_tables()
+    @property
+    def frames_client(self) -> v3io_frames.client.ClientBase:
+        if not self._frames_client:
+            self._frames_client = self._get_v3io_frames_client(self.container)
+            if self._create_table:
+                self.create_tables()
+        return self._frames_client
     def _init_tables_path(self):
         self.tables = {}
@@ -89,6 +93,19 @@ class V3IOTSDBConnector(TSDBConnector):
         )
         self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
+        errors_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
+            project=self.project,
+            kind=mm_schemas.FileTargetKind.ERRORS,
+        )
+        (
+            _,
+            _,
+            errors_path,
+        ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
+            errors_table_full_path
+        )
+        self.tables[mm_schemas.V3IOTSDBTables.ERRORS] = errors_path
         monitoring_application_full_path = (
             mlrun.mlconf.get_model_monitoring_file_target_path(
                 project=self.project,
@@ -138,7 +155,7 @@ class V3IOTSDBConnector(TSDBConnector):
         for table_name in application_tables:
             logger.info("Creating table in V3IO TSDB", table_name=table_name)
             table = self.tables[table_name]
-            self._frames_client.create(
+            self.frames_client.create(
                 backend=_TSDB_BE,
                 table=table,
                 if_exists=v3io_frames.IGNORE,
@@ -148,8 +165,9 @@ class V3IOTSDBConnector(TSDBConnector):
     def apply_monitoring_stream_steps(
         self,
         graph,
-        tsdb_batching_max_events: int = 10,
-        tsdb_batching_timeout_secs: int = 300,
+        tsdb_batching_max_events: int = 1000,
+        tsdb_batching_timeout_secs: int = 30,
+        sample_window: int = 10,
     ):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -171,7 +189,10 @@ class V3IOTSDBConnector(TSDBConnector):
             time_col=mm_schemas.EventFieldType.TIMESTAMP,
             container=self.container,
             v3io_frames=self.v3io_framesd,
-            columns=[mm_schemas.EventFieldType.LATENCY],
+            columns=[
+                mm_schemas.EventFieldType.LATENCY,
+                mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP,
+            ],
             index_cols=[
                 mm_schemas.EventFieldType.ENDPOINT_ID,
             ],
@@ -182,17 +203,23 @@ class V3IOTSDBConnector(TSDBConnector):
             key=mm_schemas.EventFieldType.ENDPOINT_ID,
         )
+        # Emits the event in window size of events based on sample_window size (10 by default)
+        graph.add_step(
+            "storey.steps.SampleWindow",
+            name="sample",
+            after="Rename",
+            window_size=sample_window,
+            key=EventFieldType.ENDPOINT_ID,
+        )
         # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
         # stats and details about the events
-        def apply_process_before_tsdb():
-            graph.add_step(
-                "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
-                name="ProcessBeforeTSDB",
-                after="sample",
-            )
-        apply_process_before_tsdb()
+        graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
+            name="ProcessBeforeTSDB",
+            after="sample",
+        )
         # Unpacked keys from each dictionary and write to TSDB target
         def apply_filter_and_unpacked_keys(name, keys):
@@ -255,6 +282,40 @@ class V3IOTSDBConnector(TSDBConnector):
         apply_storey_filter()
         apply_tsdb_target(name="tsdb3", after="FilterNotNone")
+    def handle_model_error(
+        self,
+        graph,
+        tsdb_batching_max_events: int = 1000,
+        tsdb_batching_timeout_secs: int = 30,
+        **kwargs,
+    ) -> None:
+        graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ErrorExtractor",
+            name="error_extractor",
+            after="ForwardError",
+        )
+        graph.add_step(
+            "storey.TSDBTarget",
+            name="tsdb_error",
+            after="error_extractor",
+            path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.ERRORS]}",
+            rate="1/s",
+            time_col=mm_schemas.EventFieldType.TIMESTAMP,
+            container=self.container,
+            v3io_frames=self.v3io_framesd,
+            columns=[
+                mm_schemas.EventFieldType.MODEL_ERROR,
+                mm_schemas.EventFieldType.ERROR_COUNT,
+            ],
+            index_cols=[
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            max_events=tsdb_batching_max_events,
+            flush_after_seconds=tsdb_batching_timeout_secs,
+            key=mm_schemas.EventFieldType.ENDPOINT_ID,
+        )
     def write_application_event(
         self,
         event: dict,
@@ -277,12 +338,14 @@ class V3IOTSDBConnector(TSDBConnector):
         elif kind == mm_schemas.WriterEventKind.RESULT:
             table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
             index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
-            del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
+            event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
+            # TODO: remove this when extra data is supported (ML-7460)
+            event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
         else:
             raise ValueError(f"Invalid {kind = }")
         try:
-            self._frames_client.write(
+            self.frames_client.write(
                 backend=_TSDB_BE,
                 table=table,
                 dfs=pd.DataFrame.from_records([event]),
@@ -309,7 +372,7 @@ class V3IOTSDBConnector(TSDBConnector):
             tables = mm_schemas.V3IOTSDBTables.list()
         for table_to_delete in tables:
             try:
-                self._frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
+                self.frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
             except v3io_frames.DeleteError as e:
                 logger.warning(
                     f"Failed to delete TSDB table '{table}'",
@@ -425,7 +488,7 @@ class V3IOTSDBConnector(TSDBConnector):
         aggregators = ",".join(agg_funcs) if agg_funcs else None
         table_path = self.tables[table]
         try:
-            df = self._frames_client.read(
+            df = self.frames_client.read(
                 backend=_TSDB_BE,
                 table=table_path,
                 start=start,
@@ -437,7 +500,7 @@ class V3IOTSDBConnector(TSDBConnector):
                 step=sliding_window_step,
                 **kwargs,
             )
-        except v3io_frames.ReadError as err:
+        except v3io_frames.Error as err:
             if _is_no_schema_error(err):
                 return pd.DataFrame()
             else:
@@ -504,10 +567,16 @@ class V3IOTSDBConnector(TSDBConnector):
         if type == "metrics":
             table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
             name = mm_schemas.MetricData.METRIC_NAME
+            columns = [mm_schemas.MetricData.METRIC_VALUE]
             df_handler = self.df_to_metrics_values
         elif type == "results":
             table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
             name = mm_schemas.ResultData.RESULT_NAME
+            columns = [
+                mm_schemas.ResultData.RESULT_VALUE,
+                mm_schemas.ResultData.RESULT_STATUS,
+                mm_schemas.ResultData.RESULT_KIND,
+            ]
             df_handler = self.df_to_results_values
         else:
             raise ValueError(f"Invalid {type = }")
@@ -517,11 +586,12 @@ class V3IOTSDBConnector(TSDBConnector):
             metric_and_app_names=[(metric.app, metric.name) for metric in metrics],
             table_path=table_path,
             name=name,
+            columns=columns,
         )
         logger.debug("Querying V3IO TSDB", query=query)
-        df: pd.DataFrame = self._frames_client.read(
+        df: pd.DataFrame = self.frames_client.read(
             backend=_TSDB_BE,
             start=start,
             end=end,
@@ -599,7 +669,6 @@ class V3IOTSDBConnector(TSDBConnector):
             end=end,
             columns=[mm_schemas.EventFieldType.LATENCY],
             filter_query=f"endpoint_id=='{endpoint_id}'",
-            interval=aggregation_window,
             agg_funcs=agg_funcs,
             sliding_window_step=aggregation_window,
         )
@@ -628,33 +697,153 @@ class V3IOTSDBConnector(TSDBConnector):
             ),  # pyright: ignore[reportArgumentType]
         )
-    # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
-    #
-    # def read_prediction_metric_for_endpoint_if_exists(
-    #     self, endpoint_id: str
-    # ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
-    #     """
-    #     Read the count of the latency column in the predictions table for the given endpoint_id.
-    #     We just want to check if there is any data for this endpoint_id.
-    #     """
-    #     query = self._get_sql_query(
-    #         endpoint_id=endpoint_id,
-    #         table_path=self.tables[mm_schemas.FileTargetKind.PREDICTIONS],
-    #         columns=[f"count({mm_schemas.EventFieldType.LATENCY})"],
-    #     )
-    #     try:
-    #         logger.debug("Checking TSDB", project=self.project, query=query)
-    #         df: pd.DataFrame = self._frames_client.read(
-    #             backend=_TSDB_BE, query=query, start="0", end="now"
-    #         )
-    #     except v3io_frames.ReadError as err:
-    #         if _is_no_schema_error(err):
-    #             logger.debug(
-    #                 "No predictions yet", project=self.project, endpoint_id=endpoint_id
-    #             )
-    #             return
-    #         else:
-    #             raise
-    #
-    #     if not df.empty:
-    #         return get_invocations_metric(self.project)
+    def get_last_request(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            start=start,
+            end=end,
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["last"],
+        )
+        if not df.empty:
+            df.rename(
+                columns={
+                    f"last({mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP})": mm_schemas.EventFieldType.LAST_REQUEST,
+                    f"last({mm_schemas.EventFieldType.LATENCY})": f"last_{mm_schemas.EventFieldType.LATENCY}",
+                },
+                inplace=True,
+            )
+            df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
+                mm_schemas.EventFieldType.LAST_REQUEST
+            ].map(
+                lambda last_request: datetime.fromtimestamp(
+                    last_request, tz=timezone.utc
+                )
+            )
+        return df.reset_index(drop=True)
+    def get_drift_status(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "now-24h",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["max"],
+            group_by="endpoint_id",
+        )
+        if not df.empty:
+            df.columns = [
+                col[len("max(") : -1] if "max(" in col else col for col in df.columns
+            ]
+        return df.reset_index(drop=True)
+    def get_metrics_metadata(
+        self,
+        endpoint_id: str,
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        df = self._get_records(
+            table=mm_schemas.V3IOTSDBTables.METRICS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.MetricData.METRIC_VALUE],
+            filter_query=f"endpoint_id=='{endpoint_id}'",
+            agg_funcs=["last"],
+        )
+        if not df.empty:
+            df.drop(
+                columns=[f"last({mm_schemas.MetricData.METRIC_VALUE})"], inplace=True
+            )
+        return df.reset_index(drop=True)
+    def get_results_metadata(
+        self,
+        endpoint_id: str,
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        df = self._get_records(
+            table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.ResultData.RESULT_KIND,
+            ],
+            filter_query=f"endpoint_id=='{endpoint_id}'",
+            agg_funcs=["last"],
+        )
+        if not df.empty:
+            df.rename(
+                columns={
+                    f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND
+                },
+                inplace=True,
+            )
+        return df.reset_index(drop=True)
+    def get_error_count(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.FileTargetKind.ERRORS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.EventFieldType.ERROR_COUNT],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["count"],
+        )
+        if not df.empty:
+            df.rename(
+                columns={
+                    f"count({mm_schemas.EventFieldType.ERROR_COUNT})": mm_schemas.EventFieldType.ERROR_COUNT
+                },
+                inplace=True,
+            )
+            df.dropna(inplace=True)
+        return df.reset_index(drop=True)
+    def get_avg_latency(
+        self,
+        endpoint_ids: Union[str, list[str]],
+        start: Union[datetime, str] = "0",
+        end: Union[datetime, str] = "now",
+    ) -> pd.DataFrame:
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        df = self._get_records(
+            table=mm_schemas.FileTargetKind.PREDICTIONS,
+            start=start,
+            end=end,
+            columns=[mm_schemas.EventFieldType.LATENCY],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            agg_funcs=["avg"],
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df.reset_index(drop=True)

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -18,25 +18,23 @@ import typing
 import numpy as np
 import pandas as pd
+if typing.TYPE_CHECKING:
+    from mlrun.db.base import RunDBInterface
+    from mlrun.projects import MlrunProject
 import mlrun
+import mlrun.artifacts
 import mlrun.common.model_monitoring.helpers
-import mlrun.common.schemas
-from mlrun.common.schemas.model_monitoring import (
-    EventFieldType,
-)
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.data_types.infer
+import mlrun.model_monitoring
 from mlrun.common.schemas.model_monitoring.model_endpoints import (
     ModelEndpointMonitoringMetric,
-    ModelEndpointMonitoringMetricType,
     _compose_full_name,
 )
 from mlrun.model_monitoring.model_endpoint import ModelEndpoint
 from mlrun.utils import logger
-if typing.TYPE_CHECKING:
-    from mlrun.db.base import RunDBInterface
-    from mlrun.projects import MlrunProject
-import mlrun.common.schemas.model_monitoring.constants as mm_constants
 class _BatchDict(typing.TypedDict):
     minutes: int
@@ -45,33 +43,32 @@ class _BatchDict(typing.TypedDict):
 def get_stream_path(
-    project: str = None,
+    project: str,
     function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
+    stream_uri: typing.Optional[str] = None,
 ) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
     :param project:             Project name.
-    :param function_name:    Application name. Default is model_monitoring_stream.
+    :param function_name:       Application name. Default is model_monitoring_stream.
+    :param stream_uri:          Stream URI. If provided, it will be used instead of the one from the project secret.
     :return:                    Monitoring stream path to the relevant application.
     """
-    stream_uri = mlrun.get_secret_or_env(
-        mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
+    stream_uri = stream_uri or mlrun.get_secret_or_env(
+        mm_constants.ProjectSecretKeys.STREAM_PATH
     )
     if not stream_uri or stream_uri == "v3io":
-        # TODO : remove the first part of this condition in 1.9.0
         stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
             project=project,
-            kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
+            kind=mm_constants.FileTargetKind.STREAM,
             target="online",
             function_name=function_name,
         )
-    if isinstance(stream_uri, list):  # ML-6043 - user side gets only the new stream uri
-        stream_uri = stream_uri[1]  # get new stream path, under projects
     return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
         stream_uri=stream_uri, project=project, function_name=function_name
     )
@@ -79,7 +76,7 @@ def get_stream_path(
 def get_monitoring_parquet_path(
     project: "MlrunProject",
-    kind: str = mlrun.common.schemas.model_monitoring.FileTargetKind.PARQUET,
+    kind: str = mm_constants.FileTargetKind.PARQUET,
 ) -> str:
     """Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
     project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
@@ -111,12 +108,9 @@ def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -
     """
-    return (
-        mlrun.get_secret_or_env(
-            key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
-            secret_provider=secret_provider,
-        )
-        or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
+    return mlrun.get_secret_or_env(
+        key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
+        secret_provider=secret_provider,
     )
@@ -129,12 +123,9 @@ def get_tsdb_connection_string(
     :return:                Valid TSDB connection string.
     """
-    return (
-        mlrun.get_secret_or_env(
-            key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
-            secret_provider=secret_provider,
-        )
-        or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
+    return mlrun.get_secret_or_env(
+        key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
+        secret_provider=secret_provider,
     )
@@ -184,7 +175,7 @@ def _get_monitoring_time_window_from_controller_run(
 def update_model_endpoint_last_request(
     project: str,
     model_endpoint: ModelEndpoint,
-    current_request: datetime,
+    current_request: datetime.datetime,
     db: "RunDBInterface",
 ) -> None:
     """
@@ -195,7 +186,8 @@ def update_model_endpoint_last_request(
     :param current_request: current request time
     :param db:              DB interface.
     """
-    if model_endpoint.spec.stream_path != "":
+    is_model_server_endpoint = model_endpoint.spec.stream_path != ""
+    if is_model_server_endpoint:
         current_request = current_request.isoformat()
         logger.info(
             "Update model endpoint last request time (EP with serving)",
@@ -207,14 +199,15 @@ def update_model_endpoint_last_request(
         db.patch_model_endpoint(
             project=project,
             endpoint_id=model_endpoint.metadata.uid,
-            attributes={EventFieldType.LAST_REQUEST: current_request},
+            attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
         )
-    else:
+    else:  # model endpoint without any serving function - close the window "manually"
         try:
             time_window = _get_monitoring_time_window_from_controller_run(project, db)
         except mlrun.errors.MLRunNotFoundError:
-            logger.debug(
-                "Not bumping model endpoint last request time - the monitoring controller isn't deployed yet"
+            logger.warn(
+                "Not bumping model endpoint last request time - the monitoring controller isn't deployed yet.\n"
+                "Call `project.enable_model_monitoring()` first."
             )
             return
@@ -236,7 +229,7 @@ def update_model_endpoint_last_request(
         db.patch_model_endpoint(
             project=project,
             endpoint_id=model_endpoint.metadata.uid,
-            attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
+            attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
         )
@@ -256,12 +249,11 @@ def calculate_inputs_statistics(
     # Use `DFDataInfer` to calculate the statistics over the inputs:
     inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
-        df=inputs,
-        options=mlrun.data_types.infer.InferOptions.Histogram,
+        df=inputs, options=mlrun.data_types.infer.InferOptions.Histogram
     )
     # Recalculate the histograms over the bins that are set in the sample-set of the end point:
-    for feature in inputs_statistics.keys():
+    for feature in list(inputs_statistics):
         if feature in sample_set_statistics:
             counts, bins = np.histogram(
                 inputs[feature].to_numpy(),
@@ -271,13 +263,9 @@ def calculate_inputs_statistics(
                 counts.tolist(),
                 bins.tolist(),
             ]
-        elif "hist" in inputs_statistics[feature]:
-            # Comply with the other common features' histogram length
-            mlrun.common.model_monitoring.helpers.pad_hist(
-                mlrun.common.model_monitoring.helpers.Histogram(
-                    inputs_statistics[feature]["hist"]
-                )
-            )
+        else:
+            # If the feature is not in the sample set and doesn't have a histogram, remove it from the statistics:
+            inputs_statistics.pop(feature)
     return inputs_statistics
@@ -312,7 +300,7 @@ def get_invocations_fqn(project: str) -> str:
         project=project,
         app=mm_constants.SpecialApps.MLRUN_INFRA,
         name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
-        type=ModelEndpointMonitoringMetricType.METRIC,
+        type=mm_constants.ModelEndpointMonitoringMetricType.METRIC,
     )
@@ -326,7 +314,39 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
     return ModelEndpointMonitoringMetric(
         project=project,
         app=mm_constants.SpecialApps.MLRUN_INFRA,
-        type=ModelEndpointMonitoringMetricType.METRIC,
+        type=mm_constants.ModelEndpointMonitoringMetricType.METRIC,
         name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
         full_name=get_invocations_fqn(project),
     )
+def enrich_model_endpoint_with_model_uri(
+    model_endpoint: ModelEndpoint,
+    model_obj: mlrun.artifacts.ModelArtifact,
+):
+    """
+    Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
+    to the model object that includes the project, db_key, iter, and tree.
+    In addition, we verify that the model object is of type `ModelArtifact`.
+    :param model_endpoint:    An object representing the model endpoint that will be enriched with the model uri.
+    :param model_obj:         An object representing the model artifact.
+    :raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
+    """
+    mlrun.utils.helpers.verify_field_of_type(
+        field_name="model_endpoint.spec.model_uri",
+        field_value=model_obj,
+        expected_type=mlrun.artifacts.ModelArtifact,
+    )
+    # Update model_uri with a unique reference to handle future changes
+    model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
+        project=model_endpoint.metadata.project,
+        key=model_obj.db_key,
+        iter=model_obj.iter,
+        tree=model_obj.tree,
+    )
+    model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
+        kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
+    )

mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl