PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (222) hide show

mlrun/__init__.py +14 -12
mlrun/__main__.py +3 -3
mlrun/alerts/alert.py +19 -12
mlrun/artifacts/__init__.py +0 -2
mlrun/artifacts/base.py +34 -11
mlrun/artifacts/dataset.py +16 -16
mlrun/artifacts/manager.py +13 -13
mlrun/artifacts/model.py +66 -53
mlrun/common/constants.py +6 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/formatters/function.py +1 -0
mlrun/common/formatters/model_endpoint.py +30 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/model_monitoring/__init__.py +0 -3
mlrun/common/model_monitoring/helpers.py +1 -1
mlrun/common/runtimes/constants.py +1 -2
mlrun/common/schemas/__init__.py +4 -2
mlrun/common/schemas/artifact.py +0 -6
mlrun/common/schemas/common.py +50 -0
mlrun/common/schemas/model_monitoring/__init__.py +8 -1
mlrun/common/schemas/model_monitoring/constants.py +62 -12
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -5
mlrun/common/schemas/partition.py +122 -0
mlrun/config.py +43 -15
mlrun/data_types/__init__.py +0 -2
mlrun/data_types/data_types.py +0 -1
mlrun/data_types/infer.py +3 -1
mlrun/data_types/spark.py +4 -4
mlrun/data_types/to_pandas.py +2 -11
mlrun/datastore/__init__.py +0 -2
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +12 -4
mlrun/datastore/datastore.py +9 -3
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +4 -1
mlrun/datastore/sources.py +51 -49
mlrun/datastore/store_resources.py +0 -2
mlrun/datastore/targets.py +22 -23
mlrun/datastore/utils.py +2 -2
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +126 -62
mlrun/db/factory.py +3 -0
mlrun/db/httpdb.py +767 -231
mlrun/db/nopdb.py +126 -57
mlrun/errors.py +2 -2
mlrun/execution.py +55 -29
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +40 -40
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +27 -24
mlrun/feature_store/retrieval/base.py +14 -9
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/steps.py +2 -2
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +29 -27
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/launcher/base.py +3 -4
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +4 -3
mlrun/model.py +108 -44
mlrun/model_monitoring/__init__.py +1 -2
mlrun/model_monitoring/api.py +6 -6
mlrun/model_monitoring/applications/_application_steps.py +13 -15
mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
mlrun/model_monitoring/applications/results.py +55 -3
mlrun/model_monitoring/controller.py +185 -223
mlrun/model_monitoring/db/_schedules.py +156 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/stores/__init__.py +1 -1
mlrun/model_monitoring/db/stores/base/store.py +6 -65
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
mlrun/model_monitoring/db/tsdb/base.py +74 -22
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +66 -35
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +284 -51
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
mlrun/model_monitoring/helpers.py +97 -1
mlrun/model_monitoring/model_endpoint.py +4 -2
mlrun/model_monitoring/stream_processing.py +2 -2
mlrun/model_monitoring/tracking_policy.py +10 -3
mlrun/model_monitoring/writer.py +47 -26
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +1 -1
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +31 -14
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +3 -3
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/__init__.py +1 -6
mlrun/projects/operations.py +27 -27
mlrun/projects/pipelines.py +85 -215
mlrun/projects/project.py +444 -158
mlrun/run.py +9 -9
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +13 -10
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/generators.py +2 -1
mlrun/runtimes/kubejob.py +4 -5
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -11
mlrun/runtimes/nuclio/function.py +14 -13
mlrun/runtimes/nuclio/serving.py +9 -9
mlrun/runtimes/pod.py +74 -29
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +21 -11
mlrun/runtimes/utils.py +6 -5
mlrun/serving/merger.py +6 -4
mlrun/serving/remote.py +18 -17
mlrun/serving/routers.py +27 -27
mlrun/serving/server.py +1 -1
mlrun/serving/states.py +76 -71
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +4 -4
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +1 -1
mlrun/utils/helpers.py +72 -28
mlrun/utils/logger.py +104 -2
mlrun/utils/notifications/notification/base.py +23 -4
mlrun/utils/notifications/notification/console.py +1 -1
mlrun/utils/notifications/notification/git.py +6 -6
mlrun/utils/notifications/notification/ipython.py +5 -4
mlrun/utils/notifications/notification/slack.py +1 -1
mlrun/utils/notifications/notification/webhook.py +13 -17
mlrun/utils/notifications/notification_pusher.py +23 -19
mlrun/utils/regex.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/METADATA +186 -186
mlrun-1.8.0rc1.dist-info/RECORD +356 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/WHEEL +1 -1
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -13,8 +13,7 @@
 # limitations under the License.
 import typing
-from datetime import datetime
-from typing import Union
+from datetime import datetime, timedelta, timezone
 import pandas as pd
 import taosws
@@ -82,13 +81,16 @@ class TDEngineConnector(TSDBConnector):
         """Initialize the super tables for the TSDB."""
         self.tables = {
             mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
-                project=self.project, database=self.database
+                self.database
             ),
             mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
-                project=self.project, database=self.database
+                self.database
             ),
             mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
-                project=self.project, database=self.database
+                self.database
+            ),
+            mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
+                self.database
             ),
         }
@@ -112,9 +114,11 @@ class TDEngineConnector(TSDBConnector):
         """
         table_name = (
+            f"{self.project}_"
             f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
-            f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
+            f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}_"
         )
+        event[mm_schemas.EventFieldType.PROJECT] = self.project
         if kind == mm_schemas.WriterEventKind.RESULT:
             # Write a new result
@@ -122,7 +126,6 @@ class TDEngineConnector(TSDBConnector):
             table_name = (
                 f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
             ).replace("-", "_")
-            event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
         else:
             # Write a new metric
@@ -185,9 +188,7 @@ class TDEngineConnector(TSDBConnector):
                 name=name,
                 after=after,
                 url=self._tdengine_connection_string,
-                supertable=self.tables[
-                    mm_schemas.TDEngineSuperTables.PREDICTIONS
-                ].super_table,
+                supertable=mm_schemas.TDEngineSuperTables.PREDICTIONS,
                 table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
                 time_col=mm_schemas.EventFieldType.TIME,
                 database=self.database,
@@ -209,8 +210,38 @@ class TDEngineConnector(TSDBConnector):
             after="ProcessBeforeTDEngine",
         )
-    def handle_model_error(self, graph, **kwargs) -> None:
-        pass
+    def handle_model_error(
+        self,
+        graph,
+        tsdb_batching_max_events: int = 1000,
+        tsdb_batching_timeout_secs: int = 30,
+        **kwargs,
+    ) -> None:
+        graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
+            name="error_extractor",
+            after="ForwardError",
+        )
+        graph.add_step(
+            "storey.TDEngineTarget",
+            name="tsdb_error",
+            after="error_extractor",
+            url=self._tdengine_connection_string,
+            supertable=mm_schemas.TDEngineSuperTables.ERRORS,
+            table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
+            time_col=mm_schemas.EventFieldType.TIME,
+            database=self.database,
+            columns=[
+                mm_schemas.EventFieldType.MODEL_ERROR,
+            ],
+            tag_cols=[
+                mm_schemas.EventFieldType.PROJECT,
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+                mm_schemas.EventFieldType.ERROR_TYPE,
+            ],
+            max_events=tsdb_batching_max_events,
+            flush_after_seconds=tsdb_batching_timeout_secs,
+        )
     def delete_tsdb_resources(self):
         """
@@ -220,23 +251,22 @@ class TDEngineConnector(TSDBConnector):
             "Deleting all project resources using the TDEngine connector",
             project=self.project,
         )
-        drop_statements = []
         for table in self.tables:
-            drop_statements.append(self.tables[table].drop_supertable_query())
-        try:
-            self.connection.run(
-                statements=drop_statements,
+            get_subtable_names_query = self.tables[table]._get_subtables_query(
+                values={mm_schemas.EventFieldType.PROJECT: self.project}
+            )
+            subtables = self.connection.run(
+                query=get_subtable_names_query,
                 timeout=self._timeout,
                 retries=self._retries,
-            )
-        except Exception as e:
-            logger.warning(
-                "Failed to drop TDEngine tables. You may need to drop them manually. "
-                "These can be found under the following supertables: app_results, "
-                "metrics, and predictions.",
-                project=self.project,
-                error=mlrun.errors.err_to_str(e),
+            ).data
+            drop_statements = []
+            for subtable in subtables:
+                drop_statements.append(
+                    self.tables[table]._drop_subtable_query(subtable=subtable[0])
+                )
+            self.connection.run(
+                statements=drop_statements, timeout=self._timeout, retries=self._retries
             )
         logger.debug(
             "Deleted all project resources using the TDEngine connector",
@@ -265,6 +295,10 @@ class TDEngineConnector(TSDBConnector):
         limit: typing.Optional[int] = None,
         sliding_window_step: typing.Optional[str] = None,
         timestamp_column: str = mm_schemas.EventFieldType.TIME,
+        group_by: typing.Optional[typing.Union[list[str], str]] = None,
+        preform_agg_columns: typing.Optional[list] = None,
+        order_by: typing.Optional[str] = None,
+        desc: typing.Optional[bool] = None,
     ) -> pd.DataFrame:
         """
         Getting records from TSDB data collection.
@@ -284,11 +318,26 @@ class TDEngineConnector(TSDBConnector):
                                       `sliding_window_step` is provided, interval must be provided as well. Provided
                                       as a string in the format of '1m', '1h', etc.
         :param timestamp_column:      The column name that holds the timestamp index.
+        :param group_by:              The column name to group by. Note that if `group_by` is provided, aggregation
+                                      functions must bg provided
+        :param preform_agg_columns:   The columns to preform aggregation on.
+                                      notice that all aggregation functions provided will preform on those columns.
+                                      If not provided The default behavior is to preform on all columns in columns,
+                                      if an empty list was provided The aggregation won't be performed.
+        :param order_by:              The column or alias to preform ordering on the query.
+        :param desc:                  Whether or not to sort the results in descending order.
         :return: DataFrame with the provided attributes from the data collection.
         :raise:  MLRunInvalidArgumentError if query the provided table failed.
         """
+        project_condition = f"project = '{self.project}'"
+        filter_query = (
+            f"({filter_query}) AND ({project_condition})"
+            if filter_query
+            else project_condition
+        )
         full_query = tdengine_schemas.TDEngineSchema._get_records_query(
             table=table,
             start=start,
@@ -301,6 +350,10 @@ class TDEngineConnector(TSDBConnector):
             sliding_window_step=sliding_window_step,
             timestamp_column=timestamp_column,
             database=self.database,
+            group_by=group_by,
+            preform_agg_funcs_columns=preform_agg_columns,
+            order_by=order_by,
+            desc=desc,
         )
         logger.debug("Querying TDEngine", query=full_query)
         try:
@@ -323,6 +376,7 @@ class TDEngineConnector(TSDBConnector):
         end: datetime,
         metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
         type: typing.Literal["metrics", "results"],
+        with_result_extra_data: bool = False,
     ) -> typing.Union[
         list[
             typing.Union[
@@ -340,12 +394,18 @@ class TDEngineConnector(TSDBConnector):
         timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
         columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
         if type == "metrics":
-            table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
+            if with_result_extra_data:
+                logger.warning(
+                    "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                )
+            table = mm_schemas.TDEngineSuperTables.METRICS
             name = mm_schemas.MetricData.METRIC_NAME
             columns += [name, mm_schemas.MetricData.METRIC_VALUE]
             df_handler = self.df_to_metrics_values
         elif type == "results":
-            table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
+            table = mm_schemas.TDEngineSuperTables.APP_RESULTS
             name = mm_schemas.ResultData.RESULT_NAME
             columns += [
                 name,
@@ -353,6 +413,8 @@ class TDEngineConnector(TSDBConnector):
                 mm_schemas.ResultData.RESULT_STATUS,
                 mm_schemas.ResultData.RESULT_KIND,
             ]
+            if with_result_extra_data:
+                columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
             df_handler = self.df_to_results_values
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -389,6 +451,10 @@ class TDEngineConnector(TSDBConnector):
             is_empty=df.empty,
         )
+        if not with_result_extra_data and type == "results":
+            # Set the extra data to an empty string if it's not requested
+            df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
         return df_handler(df=df, metrics=metrics, project=self.project)
     def read_predictions(
@@ -411,7 +477,7 @@ class TDEngineConnector(TSDBConnector):
                 "both or neither of `aggregation_window` and `agg_funcs` must be provided"
             )
         df = self._get_records(
-            table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
+            table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.LATENCY],
@@ -452,51 +518,218 @@ class TDEngineConnector(TSDBConnector):
     def get_last_request(
         self,
-        endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        endpoint_ids: typing.Union[str, list[str]],
+        start: typing.Optional[datetime] = None,
+        end: typing.Optional[datetime] = None,
     ) -> pd.DataFrame:
-        pass
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        start, end = self._get_start_end(start, end)
+        df = self._get_records(
+            table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+                mm_schemas.EventFieldType.TIME,
+                mm_schemas.EventFieldType.LATENCY,
+            ],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            timestamp_column=mm_schemas.EventFieldType.TIME,
+            agg_funcs=["last"],
+            group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
+            preform_agg_columns=[mm_schemas.EventFieldType.TIME],
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        df.rename(
+            columns={
+                f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
+                f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
+            },
+            inplace=True,
+        )
+        df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
+            mm_schemas.EventFieldType.LAST_REQUEST
+        ].map(
+            lambda last_request: datetime.strptime(
+                last_request, "%Y-%m-%d %H:%M:%S.%f %z"
+            ).astimezone(tz=timezone.utc)
+        )
+        return df
     def get_drift_status(
         self,
-        endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "now-24h",
-        end: Union[datetime, str] = "now",
+        endpoint_ids: typing.Union[str, list[str]],
+        start: typing.Optional[datetime] = None,
+        end: typing.Optional[datetime] = None,
     ) -> pd.DataFrame:
-        pass
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
+        start, end = self._get_start_end(start, end)
+        df = self._get_records(
+            table=mm_schemas.TDEngineSuperTables.APP_RESULTS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.ResultData.RESULT_STATUS,
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            agg_funcs=["max"],
+            group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
+            preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
+        )
+        df.rename(
+            columns={
+                f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
+            },
+            inplace=True,
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df
     def get_metrics_metadata(
         self,
         endpoint_id: str,
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: typing.Optional[datetime] = None,
+        end: typing.Optional[datetime] = None,
     ) -> pd.DataFrame:
-        pass
+        start, end = self._get_start_end(start, end)
+        df = self._get_records(
+            table=mm_schemas.TDEngineSuperTables.METRICS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.ApplicationEvent.APPLICATION_NAME,
+                mm_schemas.MetricData.METRIC_NAME,
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            filter_query=f"endpoint_id='{endpoint_id}'",
+            timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            group_by=[
+                mm_schemas.WriterEvent.APPLICATION_NAME,
+                mm_schemas.MetricData.METRIC_NAME,
+            ],
+            agg_funcs=["last"],
+        )
+        df.rename(
+            columns={
+                f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
+                f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
+                f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
+            },
+            inplace=True,
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df
     def get_results_metadata(
         self,
         endpoint_id: str,
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: typing.Optional[datetime] = None,
+        end: typing.Optional[datetime] = None,
     ) -> pd.DataFrame:
-        pass
+        start, end = self._get_start_end(start, end)
+        df = self._get_records(
+            table=mm_schemas.TDEngineSuperTables.APP_RESULTS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.ApplicationEvent.APPLICATION_NAME,
+                mm_schemas.ResultData.RESULT_NAME,
+                mm_schemas.ResultData.RESULT_KIND,
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            filter_query=f"endpoint_id='{endpoint_id}'",
+            timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            group_by=[
+                mm_schemas.WriterEvent.APPLICATION_NAME,
+                mm_schemas.ResultData.RESULT_NAME,
+            ],
+            agg_funcs=["last"],
+        )
+        df.rename(
+            columns={
+                f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
+                f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
+                f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
+                f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
+            },
+            inplace=True,
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df
     def get_error_count(
         self,
-        endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        endpoint_ids: typing.Union[str, list[str]],
+        start: typing.Optional[datetime] = None,
+        end: typing.Optional[datetime] = None,
     ) -> pd.DataFrame:
-        pass
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        start, end = self._get_start_end(start, end)
+        df = self._get_records(
+            table=mm_schemas.TDEngineSuperTables.ERRORS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.EventFieldType.MODEL_ERROR,
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            agg_funcs=["count"],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
+            f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
+            group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
+            preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
+        )
+        df.rename(
+            columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
+            inplace=True,
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df
     def get_avg_latency(
         self,
-        endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        endpoint_ids: typing.Union[str, list[str]],
+        start: typing.Optional[datetime] = None,
+        end: typing.Optional[datetime] = None,
     ) -> pd.DataFrame:
-        pass
+        endpoint_ids = (
+            endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
+        )
+        start, end = self._get_start_end(start, end)
+        df = self._get_records(
+            table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
+            start=start,
+            end=end,
+            columns=[
+                mm_schemas.EventFieldType.LATENCY,
+                mm_schemas.EventFieldType.ENDPOINT_ID,
+            ],
+            agg_funcs=["avg"],
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
+            preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
+        )
+        df.rename(
+            columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
+            inplace=True,
+        )
+        if not df.empty:
+            df.dropna(inplace=True)
+        return df
     # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
     #

mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py CHANGED Viewed

@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
         event = {
             EventFieldType.MODEL_ERROR: str(error),
+            EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
             EventFieldType.ENDPOINT_ID: endpoint_id,
             EventFieldType.TIMESTAMP: timestamp,
             EventFieldType.ERROR_COUNT: 1.0,

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 from io import StringIO
 from typing import Literal, Optional, Union
@@ -310,6 +310,7 @@ class V3IOTSDBConnector(TSDBConnector):
             ],
             index_cols=[
                 mm_schemas.EventFieldType.ENDPOINT_ID,
+                mm_schemas.EventFieldType.ERROR_TYPE,
             ],
             max_events=tsdb_batching_max_events,
             flush_after_seconds=tsdb_batching_timeout_secs,
@@ -338,9 +339,6 @@ class V3IOTSDBConnector(TSDBConnector):
         elif kind == mm_schemas.WriterEventKind.RESULT:
             table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
             index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
-            event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
-            # TODO: remove this when extra data is supported (ML-7460)
-            event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
         else:
             raise ValueError(f"Invalid {kind = }")
@@ -544,6 +542,7 @@ class V3IOTSDBConnector(TSDBConnector):
         end: datetime,
         metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
         type: Literal["metrics", "results"] = "results",
+        with_result_extra_data: bool = False,
     ) -> Union[
         list[
             Union[
@@ -565,6 +564,12 @@ class V3IOTSDBConnector(TSDBConnector):
         """
         if type == "metrics":
+            if with_result_extra_data:
+                logger.warning(
+                    "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                )
             table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
             name = mm_schemas.MetricData.METRIC_NAME
             columns = [mm_schemas.MetricData.METRIC_VALUE]
@@ -577,6 +582,8 @@ class V3IOTSDBConnector(TSDBConnector):
                 mm_schemas.ResultData.RESULT_STATUS,
                 mm_schemas.ResultData.RESULT_KIND,
             ]
+            if with_result_extra_data:
+                columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
             df_handler = self.df_to_results_values
         else:
             raise ValueError(f"Invalid {type = }")
@@ -605,6 +612,9 @@ class V3IOTSDBConnector(TSDBConnector):
             endpoint_id=endpoint_id,
             is_empty=df.empty,
         )
+        if not with_result_extra_data and type == "results":
+            # Set the extra data to an empty string if it's not requested
+            df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
         return df_handler(df=df, metrics=metrics, project=self.project)
@@ -700,12 +710,13 @@ class V3IOTSDBConnector(TSDBConnector):
     def get_last_request(
         self,
         endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
     ) -> pd.DataFrame:
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.PREDICTIONS,
             start=start,
@@ -734,12 +745,14 @@ class V3IOTSDBConnector(TSDBConnector):
     def get_drift_status(
         self,
         endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "now-24h",
-        end: Union[datetime, str] = "now",
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
     ) -> pd.DataFrame:
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
+        start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
             start=start,
@@ -758,9 +771,10 @@ class V3IOTSDBConnector(TSDBConnector):
     def get_metrics_metadata(
         self,
         endpoint_id: str,
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
     ) -> pd.DataFrame:
+        start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.V3IOTSDBTables.METRICS,
             start=start,
@@ -778,9 +792,10 @@ class V3IOTSDBConnector(TSDBConnector):
     def get_results_metadata(
         self,
         endpoint_id: str,
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
     ) -> pd.DataFrame:
+        start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
             start=start,
@@ -803,18 +818,20 @@ class V3IOTSDBConnector(TSDBConnector):
     def get_error_count(
         self,
         endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
     ) -> pd.DataFrame:
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.ERRORS,
             start=start,
             end=end,
             columns=[mm_schemas.EventFieldType.ERROR_COUNT],
-            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
+            filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
+            f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
             agg_funcs=["count"],
         )
         if not df.empty:
@@ -830,12 +847,13 @@ class V3IOTSDBConnector(TSDBConnector):
     def get_avg_latency(
         self,
         endpoint_ids: Union[str, list[str]],
-        start: Union[datetime, str] = "0",
-        end: Union[datetime, str] = "now",
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
     ) -> pd.DataFrame:
         endpoint_ids = (
             endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
         )
+        start, end = self._get_start_end(start, end)
         df = self._get_records(
             table=mm_schemas.FileTargetKind.PREDICTIONS,
             start=start,

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0rc1py3-none-any.whl