PyPI - mlrun - Versions diffs - 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl - Mend

mlrun 1.7.0rc38py3-none-any.whl → 1.7.0rc40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (42) hide show

mlrun/alerts/alert.py +30 -27
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/notification.py +1 -0
mlrun/datastore/alibaba_oss.py +2 -2
mlrun/datastore/azure_blob.py +6 -3
mlrun/datastore/base.py +1 -1
mlrun/datastore/dbfs_store.py +2 -2
mlrun/datastore/google_cloud_storage.py +83 -20
mlrun/datastore/s3.py +2 -2
mlrun/datastore/sources.py +54 -0
mlrun/datastore/targets.py +9 -53
mlrun/db/httpdb.py +6 -1
mlrun/errors.py +8 -0
mlrun/execution.py +7 -0
mlrun/feature_store/api.py +5 -0
mlrun/feature_store/retrieval/job.py +1 -0
mlrun/model.py +24 -3
mlrun/model_monitoring/api.py +9 -0
mlrun/model_monitoring/applications/_application_steps.py +36 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
mlrun/model_monitoring/controller.py +15 -11
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +5 -5
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +35 -7
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -1
mlrun/model_monitoring/helpers.py +16 -17
mlrun/model_monitoring/stream_processing.py +2 -3
mlrun/projects/pipelines.py +19 -30
mlrun/projects/project.py +69 -51
mlrun/run.py +8 -6
mlrun/runtimes/__init__.py +4 -0
mlrun/runtimes/nuclio/api_gateway.py +9 -0
mlrun/runtimes/nuclio/application/application.py +112 -54
mlrun/runtimes/nuclio/function.py +1 -1
mlrun/utils/helpers.py +33 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/METADATA +8 -11
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/RECORD +42 -42
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/top_level.txt +0 -0

mlrun/model.py CHANGED Viewed

@@ -679,7 +679,24 @@ class ImageBuilder(ModelObj):
 class Notification(ModelObj):
-    """Notification specification"""
+    """Notification object
+    :param kind: notification implementation kind - slack, webhook, etc.
+    :param name: for logging and identification
+    :param message: message content in the notification
+    :param severity: severity to display in the notification
+    :param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
+    :param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
+                      to evaluate if the notification should be sent in addition to the 'when' statuses.
+                      e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
+    :param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
+                   git repository details, etc.)
+    :param secret_params: secret parameters for the notification implementation, same as params but will be stored
+                          in a k8s secret and passed as a secret reference to the implementation.
+    :param status: notification status - pending, sent, error
+    :param sent_time: time the notification was sent
+    :param reason: failure reason if the notification failed to send
+    """
     def __init__(
         self,
@@ -1468,7 +1485,11 @@ class RunObject(RunTemplate):
     @property
     def error(self) -> str:
         """error string if failed"""
-        if self.status:
+        if (
+            self.status
+            and self.status.state
+            in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
+        ):
             unknown_error = ""
             if (
                 self.status.state
@@ -1484,8 +1505,8 @@ class RunObject(RunTemplate):
             return (
                 self.status.error
-                or self.status.reason
                 or self.status.status_text
+                or self.status.reason
                 or unknown_error
             )
         return ""

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -616,7 +616,16 @@ def _create_model_monitoring_function_base(
         app_step = prepare_step.to(class_name=application_class, **application_kwargs)
     else:
         app_step = prepare_step.to(class_name=application_class)
     app_step.__class__ = mlrun.serving.MonitoringApplicationStep
+    app_step.error_handler(
+        name="ApplicationErrorHandler",
+        class_name="mlrun.model_monitoring.applications._application_steps._ApplicationErrorHandler",
+        full_event=True,
+        project=project,
+    )
     app_step.to(
         class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
         name="PushToMonitoringWriter",

mlrun/model_monitoring/applications/_application_steps.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import Optional
 import mlrun.common.helpers
 import mlrun.common.model_monitoring.helpers
+import mlrun.common.schemas.alert as alert_objects
 import mlrun.common.schemas.model_monitoring.constants as mm_constant
 import mlrun.datastore
 import mlrun.serving
@@ -164,3 +165,38 @@ class _PrepareMonitoringEvent(StepToDict):
         )
         context.__class__ = MonitoringApplicationContext
         return context
+class _ApplicationErrorHandler(StepToDict):
+    def __init__(self, project: str, name: Optional[str] = None):
+        self.project = project
+        self.name = name or "ApplicationErrorHandler"
+    def do(self, event):
+        """
+        Handle model monitoring application error. This step will generate an event, describing the error.
+        :param event: Application event.
+        """
+        logger.error(f"Error in application step: {event}")
+        event_data = mlrun.common.schemas.Event(
+            kind=alert_objects.EventKind.MM_APP_FAILED,
+            entity={
+                "kind": alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
+                "project": self.project,
+                "ids": [f"{self.project}_{event.body.application_name}"],
+            },
+            value_dict={
+                "Error": event.error,
+                "Timestamp": event.timestamp,
+                "Application Class": event.body.application_name,
+                "Endpoint ID": event.body.endpoint_id,
+            },
+        )
+        mlrun.get_run_db().generate_event(
+            name=alert_objects.EventKind.MM_APP_FAILED, event_data=event_data
+        )
+        logger.info("Event generated successfully")

mlrun/model_monitoring/applications/histogram_data_drift.py CHANGED Viewed

@@ -91,7 +91,9 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
     """
     MLRun's default data drift application for model monitoring.
-    The application expects tabular numerical data, and calculates three metrics over the features' histograms.
+    The application expects tabular numerical data, and calculates three metrics over the shared features' histograms.
+    The metrics are calculated on features that have reference data from the training dataset. When there is no
+    reference data (`feature_stats`), this application send a warning log and does nothing.
     The three metrics are:
     * Hellinger distance.
@@ -112,6 +114,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
         project.enable_model_monitoring()
+    To avoid it, pass `deploy_histogram_data_drift_app=False`.
     """
     NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
@@ -223,19 +226,18 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
         return metrics
     @staticmethod
-    def _remove_timestamp_feature(
-        sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
+    def _get_shared_features_sample_stats(
+        monitoring_context: mm_context.MonitoringApplicationContext,
     ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
         """
-        Drop the 'timestamp' feature if it exists, as it is irrelevant
-        in the plotly artifact
+        Filter out features without reference data in `feature_stats`, e.g. `timestamp`.
         """
-        sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
-            sample_set_statistics.copy()
+        return mlrun.common.model_monitoring.helpers.FeatureStats(
+            {
+                key: monitoring_context.sample_df_stats[key]
+                for key in monitoring_context.feature_stats
+            }
         )
-        if EventFieldType.TIMESTAMP in sample_set_statistics:
-            del sample_set_statistics[EventFieldType.TIMESTAMP]
-        return sample_set_statistics
     @staticmethod
     def _log_json_artifact(
@@ -299,8 +301,8 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
             self._log_json_artifact(drift_per_feature_values, monitoring_context)
         self._log_plotly_table_artifact(
-            sample_set_statistics=self._remove_timestamp_feature(
-                monitoring_context.sample_df_stats
+            sample_set_statistics=self._get_shared_features_sample_stats(
+                monitoring_context
             ),
             inputs_statistics=monitoring_context.feature_stats,
             metrics_per_feature=metrics_per_feature,
@@ -325,7 +327,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
         """
         monitoring_context.logger.debug("Starting to run the application")
         if not monitoring_context.feature_stats:
-            monitoring_context.logger.info(
+            monitoring_context.logger.warning(
                 "No feature statistics found, skipping the application. \n"
                 "In order to run the application, training set must be provided when logging the model."
             )

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -335,19 +335,23 @@ class MonitoringApplicationController:
                 return
             monitoring_functions = self.project_obj.list_model_monitoring_functions()
             if monitoring_functions:
-                # Gets only application in ready state
                 applications_names = list(
-                    {
-                        app.metadata.name
-                        for app in monitoring_functions
-                        if (
-                            app.status.state == "ready"
-                            # workaround for the default app, as its `status.state` is `None`
-                            or app.metadata.name
-                            == mm_constants.HistogramDataDriftApplicationConstants.NAME
-                        )
-                    }
+                    {app.metadata.name for app in monitoring_functions}
                 )
+            # if monitoring_functions: - TODO : ML-7700
+            #   Gets only application in ready state
+            #   applications_names = list(
+            #       {
+            #           app.metadata.name
+            #           for app in monitoring_functions
+            #           if (
+            #               app.status.state == "ready"
+            #               # workaround for the default app, as its `status.state` is `None`
+            #               or app.metadata.name
+            #               == mm_constants.HistogramDataDriftApplicationConstants.NAME
+            #           )
+            #       }
+            #   )
             if not applications_names:
                 logger.info("No monitoring functions found", project=self.project)
                 return

mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py CHANGED Viewed

@@ -34,11 +34,11 @@ fields_to_encode_decode = [
 ]
 _METRIC_FIELDS: list[str] = [
-    mm_schemas.WriterEvent.APPLICATION_NAME,
-    mm_schemas.MetricData.METRIC_NAME,
-    mm_schemas.MetricData.METRIC_VALUE,
-    mm_schemas.WriterEvent.START_INFER_TIME,
-    mm_schemas.WriterEvent.END_INFER_TIME,
+    mm_schemas.WriterEvent.APPLICATION_NAME.value,
+    mm_schemas.MetricData.METRIC_NAME.value,
+    mm_schemas.MetricData.METRIC_VALUE.value,
+    mm_schemas.WriterEvent.START_INFER_TIME.value,
+    mm_schemas.WriterEvent.END_INFER_TIME.value,
 ]

mlrun/model_monitoring/db/tsdb/tdengine/schemas.py CHANGED Viewed

@@ -17,6 +17,8 @@ from dataclasses import dataclass
 from io import StringIO
 from typing import Optional, Union
+import taosws
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.common.types
@@ -28,6 +30,9 @@ class _TDEngineColumnType:
         self.data_type = data_type
         self.length = length
+    def values_to_column(self, values):
+        raise NotImplementedError()
     def __str__(self):
         if self.length is not None:
             return f"{self.data_type}({self.length})"
@@ -44,6 +49,26 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
     BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
+def values_to_column(values, column_type):
+    if column_type == _TDEngineColumn.TIMESTAMP:
+        timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
+        return taosws.millis_timestamps_to_column(timestamps)
+    if column_type == _TDEngineColumn.FLOAT:
+        return taosws.floats_to_column(values)
+    if column_type == _TDEngineColumn.INT:
+        return taosws.ints_to_column(values)
+    if column_type == _TDEngineColumn.BINARY_40:
+        return taosws.binary_to_column(values)
+    if column_type == _TDEngineColumn.BINARY_64:
+        return taosws.binary_to_column(values)
+    if column_type == _TDEngineColumn.BINARY_10000:
+        return taosws.binary_to_column(values)
+    raise mlrun.errors.MLRunInvalidArgumentError(
+        f"unsupported column type '{column_type}'"
+    )
 @dataclass
 class TDEngineSchema:
     """
@@ -55,13 +80,14 @@ class TDEngineSchema:
     def __init__(
         self,
         super_table: str,
-        columns: dict[str, str],
+        columns: dict[str, _TDEngineColumn],
         tags: dict[str, str],
+        database: Optional[str] = None,
     ):
         self.super_table = super_table
         self.columns = columns
         self.tags = tags
-        self.database = _MODEL_MONITORING_DATABASE
+        self.database = database or _MODEL_MONITORING_DATABASE
     def _create_super_table_query(self) -> str:
         columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
@@ -83,11 +109,23 @@ class TDEngineSchema:
     def _insert_subtable_query(
         self,
+        connection: taosws.Connection,
         subtable: str,
         values: dict[str, Union[str, int, float, datetime.datetime]],
-    ) -> str:
-        values = ", ".join(f"'{values[val]}'" for val in self.columns)
-        return f"INSERT INTO {self.database}.{subtable} VALUES ({values});"
+    ) -> taosws.TaosStmt:
+        stmt = connection.statement()
+        question_marks = ", ".join("?" * len(self.columns))
+        stmt.prepare(f"INSERT INTO ? VALUES ({question_marks});")
+        stmt.set_tbname_tags(subtable, [])
+        bind_params = []
+        for col_name, col_type in self.columns.items():
+            val = values[col_name]
+            bind_params.append(values_to_column([val], col_type))
+        stmt.bind_param(bind_params)
+        return stmt
     def _delete_subtable_query(
         self,
@@ -188,53 +226,53 @@ class TDEngineSchema:
 @dataclass
 class AppResultTable(TDEngineSchema):
-    super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
-    columns = {
-        mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
-        mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
-        mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
-        mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
-        mm_schemas.ResultData.CURRENT_STATS: _TDEngineColumn.BINARY_10000,
-    }
-    tags = {
-        mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
-        mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
-        mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
-        mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
-        mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
-    }
-    database = _MODEL_MONITORING_DATABASE
+    def __init__(self, database: Optional[str] = None):
+        super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
+        columns = {
+            mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
+            mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
+            mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
+            mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
+        }
+        tags = {
+            mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
+            mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
+            mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
+            mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
+        }
+        super().__init__(super_table, columns, tags, database)
 @dataclass
 class Metrics(TDEngineSchema):
-    super_table = mm_schemas.TDEngineSuperTables.METRICS
-    columns = {
-        mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
-        mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
-        mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
-    }
-    tags = {
-        mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
-        mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
-        mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
-        mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
-    }
-    database = _MODEL_MONITORING_DATABASE
+    def __init__(self, database: Optional[str] = None):
+        super_table = mm_schemas.TDEngineSuperTables.METRICS
+        columns = {
+            mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
+            mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
+            mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
+        }
+        tags = {
+            mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
+            mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
+            mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
+        }
+        super().__init__(super_table, columns, tags, database)
 @dataclass
 class Predictions(TDEngineSchema):
-    super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
-    columns = {
-        mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
-        mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
-        mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
-    }
-    tags = {
-        mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
-        mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
-    }
-    database = _MODEL_MONITORING_DATABASE
+    def __init__(self, database: Optional[str] = None):
+        super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
+        columns = {
+            mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
+            mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
+            mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
+        }
+        tags = {
+            mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
+        }
+        super().__init__(super_table, columns, tags, database)

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -58,15 +58,26 @@ class TDEngineConnector(TSDBConnector):
         except taosws.QueryError:
             # Database already exists
             pass
-        conn.execute(f"USE {self.database}")
+        try:
+            conn.execute(f"USE {self.database}")
+        except taosws.QueryError as e:
+            raise mlrun.errors.MLRunTSDBConnectionFailure(
+                f"Failed to use TDEngine database {self.database}, {mlrun.errors.err_to_str(e)}"
+            )
         return conn
     def _init_super_tables(self):
         """Initialize the super tables for the TSDB."""
         self.tables = {
-            mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(),
-            mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(),
-            mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(),
+            mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
+                self.database
+            ),
+            mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
+                self.database
+            ),
+            mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
+                self.database
+            ),
         }
     def create_tables(self):
@@ -97,6 +108,7 @@ class TDEngineConnector(TSDBConnector):
             table_name = (
                 f"{table_name}_" f"{event[mm_schemas.ResultData.RESULT_NAME]}"
             ).replace("-", "_")
+            event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
         else:
             # Write a new metric
@@ -105,14 +117,30 @@ class TDEngineConnector(TSDBConnector):
                 f"{table_name}_" f"{event[mm_schemas.MetricData.METRIC_NAME]}"
             ).replace("-", "_")
+        # Convert the datetime strings to datetime objects
+        event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
+            val=event[mm_schemas.WriterEvent.END_INFER_TIME]
+        )
+        event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
+            val=event[mm_schemas.WriterEvent.START_INFER_TIME]
+        )
         create_table_query = table._create_subtable_query(
             subtable=table_name, values=event
         )
         self._connection.execute(create_table_query)
-        insert_table_query = table._insert_subtable_query(
-            subtable=table_name, values=event
+        insert_statement = table._insert_subtable_query(
+            self._connection,
+            subtable=table_name,
+            values=event,
         )
-        self._connection.execute(insert_table_query)
+        insert_statement.add_batch()
+        insert_statement.execute()
+    @staticmethod
+    def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
+        return datetime.fromisoformat(val) if isinstance(val, str) else val
     def apply_monitoring_stream_steps(self, graph):
         """

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -326,7 +326,9 @@ class V3IOTSDBConnector(TSDBConnector):
         elif kind == mm_schemas.WriterEventKind.RESULT:
             table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
             index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
-            del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
+            event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
+            # TODO: remove this when extra data is supported (ML-7460)
+            event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
         else:
             raise ValueError(f"Invalid {kind = }")

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -20,10 +20,8 @@ import pandas as pd
 import mlrun
 import mlrun.common.model_monitoring.helpers
-import mlrun.common.schemas
-from mlrun.common.schemas.model_monitoring import (
-    EventFieldType,
-)
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.data_types.infer
 from mlrun.common.schemas.model_monitoring.model_endpoints import (
     ModelEndpointMonitoringMetric,
     ModelEndpointMonitoringMetricType,
@@ -35,7 +33,6 @@ from mlrun.utils import logger
 if typing.TYPE_CHECKING:
     from mlrun.db.base import RunDBInterface
     from mlrun.projects import MlrunProject
-import mlrun.common.schemas.model_monitoring.constants as mm_constants
 class _BatchDict(typing.TypedDict):
@@ -45,26 +42,29 @@ class _BatchDict(typing.TypedDict):
 def get_stream_path(
-    project: str, function_name: str = mm_constants.MonitoringFunctionNames.STREAM
+    project: str,
+    function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
+    stream_uri: typing.Optional[str] = None,
 ) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
     :param project:             Project name.
-    :param function_name:    Application name. Default is model_monitoring_stream.
+    :param function_name:       Application name. Default is model_monitoring_stream.
+    :param stream_uri:          Stream URI. If provided, it will be used instead of the one from the project secret.
     :return:                    Monitoring stream path to the relevant application.
     """
-    stream_uri = mlrun.get_secret_or_env(
-        mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
+    stream_uri = stream_uri or mlrun.get_secret_or_env(
+        mm_constants.ProjectSecretKeys.STREAM_PATH
     )
     if not stream_uri or stream_uri == "v3io":
         # TODO : remove the first part of this condition in 1.9.0
         stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
             project=project,
-            kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
+            kind=mm_constants.FileTargetKind.STREAM,
             target="online",
             function_name=function_name,
         )
@@ -78,7 +78,7 @@ def get_stream_path(
 def get_monitoring_parquet_path(
     project: "MlrunProject",
-    kind: str = mlrun.common.schemas.model_monitoring.FileTargetKind.PARQUET,
+    kind: str = mm_constants.FileTargetKind.PARQUET,
 ) -> str:
     """Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
     project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
@@ -111,7 +111,7 @@ def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -
     """
     return mlrun.get_secret_or_env(
-        key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
+        key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
         secret_provider=secret_provider,
     )
@@ -126,7 +126,7 @@ def get_tsdb_connection_string(
     """
     return mlrun.get_secret_or_env(
-        key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
+        key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
         secret_provider=secret_provider,
     )
@@ -200,7 +200,7 @@ def update_model_endpoint_last_request(
         db.patch_model_endpoint(
             project=project,
             endpoint_id=model_endpoint.metadata.uid,
-            attributes={EventFieldType.LAST_REQUEST: current_request},
+            attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
         )
     else:
         try:
@@ -229,7 +229,7 @@ def update_model_endpoint_last_request(
         db.patch_model_endpoint(
             project=project,
             endpoint_id=model_endpoint.metadata.uid,
-            attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
+            attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
         )
@@ -249,8 +249,7 @@ def calculate_inputs_statistics(
     # Use `DFDataInfer` to calculate the statistics over the inputs:
     inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
-        df=inputs,
-        options=mlrun.data_types.infer.InferOptions.Histogram,
+        df=inputs, options=mlrun.data_types.infer.InferOptions.Histogram
     )
     # Recalculate the histograms over the bins that are set in the sample-set of the end point:

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -202,7 +202,7 @@ class EventStreamProcessor:
         def apply_process_endpoint_event():
             graph.add_step(
                 "ProcessEndpointEvent",
-                after="FilterError",
+                after="extract_endpoint",  # TODO: change this to FilterError in ML-7456
                 full_event=True,
                 project=self.project,
             )
@@ -527,9 +527,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         # If error key has been found in the current event,
         # increase the error counter by 1 and raise the error description
         error = event.get("error")
-        if error:
+        if error:  # TODO: delete this in ML-7456
             self.error_count[endpoint_id] += 1
-            # TODO: write to tsdb / kv once in a while
             raise mlrun.errors.MLRunInvalidArgumentError(str(error))
         # Validate event fields

mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc38py3-none-any.whl → 1.7.0rc40py3-none-any.whl