PyPI - mlrun - Versions diffs - 1.7.0rc35__py3-none-any.whl → 1.7.0rc37__py3-none-any.whl - Mend

mlrun 1.7.0rc35py3-none-any.whl → 1.7.0rc37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (41) hide show

mlrun/alerts/alert.py +63 -0
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +1 -1
mlrun/common/schemas/notification.py +23 -4
mlrun/config.py +1 -0
mlrun/datastore/s3.py +8 -1
mlrun/datastore/spark_utils.py +30 -0
mlrun/feature_store/api.py +19 -1
mlrun/feature_store/steps.py +8 -0
mlrun/model_monitoring/api.py +24 -7
mlrun/model_monitoring/applications/_application_steps.py +12 -3
mlrun/model_monitoring/applications/base.py +8 -0
mlrun/model_monitoring/applications/evidently_base.py +23 -22
mlrun/model_monitoring/controller.py +5 -1
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +14 -1
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +1 -1
mlrun/model_monitoring/db/tsdb/base.py +20 -11
mlrun/model_monitoring/helpers.py +1 -2
mlrun/model_monitoring/stream_processing.py +20 -0
mlrun/model_monitoring/writer.py +4 -1
mlrun/projects/operations.py +4 -0
mlrun/projects/project.py +4 -0
mlrun/runtimes/base.py +3 -0
mlrun/runtimes/nuclio/api_gateway.py +1 -1
mlrun/runtimes/nuclio/application/application.py +53 -12
mlrun/runtimes/nuclio/function.py +5 -1
mlrun/runtimes/sparkjob/spark3job.py +4 -7
mlrun/runtimes/utils.py +18 -0
mlrun/serving/routers.py +1 -4
mlrun/serving/server.py +4 -7
mlrun/serving/states.py +8 -3
mlrun/serving/v2_serving.py +9 -9
mlrun/utils/db.py +15 -0
mlrun/utils/http.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/METADATA +6 -6
{mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/RECORD +41 -41
{mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/top_level.txt +0 -0

mlrun/alerts/alert.py CHANGED Viewed

@@ -28,6 +28,7 @@ class AlertConfig(ModelObj):
         "severity",
         "reset_policy",
         "state",
+        "count",
     ]
     _fields_to_serialize = ModelObj._fields_to_serialize + [
         "entities",
@@ -54,6 +55,68 @@ class AlertConfig(ModelObj):
         created: str = None,
         count: int = None,
     ):
+        """
+        Alert config object
+        Example::
+            # create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
+            3 times in the next hour.
+            from mlrun.alerts import AlertConfig
+            import mlrun.common.schemas.alert as alert_objects
+            entity_kind = alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT
+            entity_id = get_default_result_instance_fqn(endpoint_id)
+            event_name = alert_objects.EventKind.DATA_DRIFT_DETECTED
+            notification = mlrun.model.Notification(
+                kind="slack",
+                name="slack_notification",
+                message="drift was detected",
+                severity="warning",
+                when=["now"],
+                condition="failed",
+                secret_params={
+                    "webhook": "https://hooks.slack.com/",
+                },
+            ).to_dict()
+            alert_data = AlertConfig(
+                project="my-project",
+                name="drift-alert",
+                summary="a drift was detected",
+                severity=alert_objects.AlertSeverity.LOW,
+                entities=alert_objects.EventEntities(
+                    kind=entity_kind, project="my-project", ids=[entity_id]
+                ),
+                trigger=alert_objects.AlertTrigger(events=[event_name]),
+                criteria=alert_objects.AlertCriteria(count=3, period="1h"),
+                notifications=[alert_objects.AlertNotification(notification=notification)],
+            )
+            project.store_alert_config(alert_data)
+        :param project:        name of the project to associate the alert with
+        :param name:           name of the alert
+        :param template:       optional parameter that allows to create an alert based on a predefined template.
+                               you can pass either an AlertTemplate object or a string (the template name).
+                               if a template is used, many fields of the alert will be auto-generated based on the
+                               template. however, you still need to provide the following fields:
+                               `name`, `project`, `entity`, `notifications`
+        :param description:    description of the alert
+        :param summary:        summary of the alert, will be sent in the generated notifications
+        :param severity:       severity of the alert
+        :param trigger:        the events that will trigger this alert, may be a simple trigger based on events or
+                               complex trigger which is based on a prometheus alert
+        :param criteria:       when the alert will be triggered based on the specified number of events within the
+                               defined time period.
+        :param reset_policy:   when to clear the alert. May be "manual" for manual reset of the alert, or
+                               "auto" if the criteria contains a time period
+        :param notifications:  list of notifications to invoke once the alert is triggered
+        :param entities:       entities that the event relates to. The entity object will contain fields that uniquely
+                               identify a given entity in the system
+        :param id:             internal id of the alert (user should not supply it)
+        :param state:          state of the alert, may be active/inactive (user should not supply it)
+        :param created:        when the alert is created (user should not supply it)
+        :param count:          internal counter of the alert (user should not supply it)
+        """
         self.project = project
         self.name = name
         self.description = description

mlrun/common/schemas/alert.py CHANGED Viewed

@@ -149,7 +149,7 @@ class AlertConfig(pydantic.BaseModel):
     entities: EventEntities
     trigger: AlertTrigger
     criteria: Optional[AlertCriteria]
-    reset_policy: ResetPolicy = ResetPolicy.MANUAL
+    reset_policy: ResetPolicy = ResetPolicy.AUTO
     notifications: pydantic.conlist(AlertNotification, min_items=1)
     state: AlertActiveState = AlertActiveState.INACTIVE
     count: Optional[int] = 0
@@ -185,7 +185,7 @@ class AlertTemplate(
     severity: AlertSeverity
     trigger: AlertTrigger
     criteria: Optional[AlertCriteria]
-    reset_policy: ResetPolicy = ResetPolicy.MANUAL
+    reset_policy: ResetPolicy = ResetPolicy.AUTO
     # This is slightly different than __eq__ as it doesn't compare everything
     def templates_differ(self, other):

mlrun/common/schemas/api_gateway.py CHANGED Viewed

@@ -107,7 +107,7 @@ class APIGateway(_APIGatewayBaseModel):
             self.spec.host + self.spec.path
             if self.spec.path and self.spec.host
             else self.spec.host
-        )
+        ).rstrip("/")
     def enrich_mlrun_names(self):
         self._enrich_api_gateway_mlrun_name()

mlrun/common/schemas/notification.py CHANGED Viewed

@@ -50,15 +50,34 @@ class NotificationLimits(enum.Enum):
 class Notification(pydantic.BaseModel):
+    """
+    Notification object schema
+    :param kind: notification implementation kind - slack, webhook, etc.
+    :param name: for logging and identification
+    :param message: message content in the notification
+    :param severity: severity to display in the notification
+    :param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
+    :param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
+                      to evaluate if the notification should be sent in addition to the 'when' statuses.
+                      e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
+    :param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
+                   git repository details, etc.)
+    :param secret_params: secret parameters for the notification implementation, same as params but will be stored
+                          in a k8s secret and passed as a secret reference to the implementation.
+    :param status: notification status - pending, sent, error
+    :param sent_time: time the notification was sent
+    :param reason: failure reason if the notification failed to send
+    """
     kind: NotificationKind
     name: str
     message: str
     severity: NotificationSeverity
     when: list[str]
-    condition: str = None
-    params: dict[str, typing.Any] = None
-    status: NotificationStatus = None
-    sent_time: typing.Union[str, datetime.datetime] = None
+    condition: typing.Optional[str] = None
+    params: typing.Optional[dict[str, typing.Any]] = None
+    status: typing.Optional[NotificationStatus] = None
+    sent_time: typing.Optional[typing.Union[str, datetime.datetime]] = None
     secret_params: typing.Optional[dict[str, typing.Any]] = None
     reason: typing.Optional[str] = None

mlrun/config.py CHANGED Viewed

@@ -1166,6 +1166,7 @@ class Config:
                 )
             elif kind == "stream":  # return list for mlrun<1.6.3 BC
                 return [
+                    # TODO: remove the first stream in 1.9.0
                     mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
                         project=project,
                         kind=kind,

mlrun/datastore/s3.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import time
 import boto3
+from boto3.s3.transfer import TransferConfig
 from fsspec.registry import get_filesystem_class
 import mlrun.errors
@@ -40,6 +41,12 @@ class S3Store(DataStore):
         profile_name = self._get_secret_or_env("AWS_PROFILE")
         assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
+        self.config = TransferConfig(
+            multipart_threshold=1024 * 1024 * 25,
+            max_concurrency=10,
+            multipart_chunksize=1024 * 1024 * 25,
+        )
         # If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
         if assume_role_arn:
             client = boto3.client(
@@ -166,7 +173,7 @@ class S3Store(DataStore):
     def upload(self, key, src_path):
         bucket, key = self.get_bucket_and_key(key)
-        self.s3.Object(bucket, key).put(Body=open(src_path, "rb"))
+        self.s3.Bucket(bucket).upload_file(src_path, key, Config=self.config)
     def get(self, key, size=None, offset=0):
         bucket, key = self.get_bucket_and_key(key)

mlrun/datastore/spark_utils.py CHANGED Viewed

@@ -13,7 +13,10 @@
 # limitations under the License.
+from typing import Union
 import mlrun
+from mlrun.features import Entity
 def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str]:
@@ -35,3 +38,30 @@ def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str
         else:
             non_hadoop_spark_options[key] = value
     return non_hadoop_spark_options
+def check_special_columns_exists(
+    spark_df, entities: list[Union[Entity, str]], timestamp_key: str, label_column: str
+):
+    columns = spark_df.columns
+    entities = entities or []
+    entities = [
+        entity.name if isinstance(entity, Entity) else entity for entity in entities
+    ]
+    missing_entities = [entity for entity in entities if entity not in columns]
+    cases_message = "Please check the letter cases (uppercase or lowercase)"
+    if missing_entities:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"There are missing entities from dataframe during ingestion. missing_entities: {missing_entities}."
+            f" {cases_message}"
+        )
+    if timestamp_key and timestamp_key not in columns:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"timestamp_key is missing from dataframe during ingestion. timestamp_key: {timestamp_key}."
+            f" {cases_message}"
+        )
+    if label_column and label_column not in columns:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"label_column is missing from dataframe during ingestion. label_column: {label_column}. "
+            f"{cases_message}"
+        )

mlrun/feature_store/api.py CHANGED Viewed

@@ -1032,6 +1032,8 @@ def _ingest_with_spark(
     try:
         import pyspark.sql
+        from mlrun.datastore.spark_utils import check_special_columns_exists
         if spark is None or spark is True:
             # create spark context
@@ -1050,7 +1052,6 @@ def _ingest_with_spark(
             created_spark_context = True
         timestamp_key = featureset.spec.timestamp_key
         if isinstance(source, pd.DataFrame):
             df = spark.createDataFrame(source)
         elif isinstance(source, pyspark.sql.DataFrame):
@@ -1080,6 +1081,12 @@ def _ingest_with_spark(
                 target = get_target_driver(target, featureset)
             target.set_resource(featureset)
             if featureset.spec.passthrough and target.is_offline:
+                check_special_columns_exists(
+                    spark_df=df,
+                    entities=featureset.spec.entities,
+                    timestamp_key=timestamp_key,
+                    label_column=featureset.spec.label_column,
+                )
                 continue
             spark_options = target.get_spark_options(
                 key_columns, timestamp_key, overwrite
@@ -1090,6 +1097,17 @@ def _ingest_with_spark(
                 df_to_write, key_columns, timestamp_key, spark_options
             )
             write_format = spark_options.pop("format", None)
+            # We can get to this point if the column exists in different letter cases,
+            # so PySpark will be able to read it, but we still have to raise an exception for it.
+            # This check is here and not in to_spark_df because in spark_merger we can have a target
+            # that has different letter cases than the source, like in SnowflakeTarget.
+            check_special_columns_exists(
+                spark_df=df_to_write,
+                entities=featureset.spec.entities,
+                timestamp_key=timestamp_key,
+                label_column=featureset.spec.label_column,
+            )
             if overwrite:
                 write_spark_dataframe_with_options(
                     spark_options, df_to_write, "overwrite", write_format=write_format

mlrun/feature_store/steps.py CHANGED Viewed

@@ -743,3 +743,11 @@ class DropFeatures(StepToDict, MLRunStep):
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"DropFeatures can only drop features, not entities: {dropped_entities}"
             )
+        if feature_set.spec.label_column in features:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"DropFeatures can not drop label_column: {feature_set.spec.label_column}"
+            )
+        if feature_set.spec.timestamp_key in features:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"DropFeatures can not drop timestamp_key: {feature_set.spec.timestamp_key}"
+            )

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -252,14 +252,31 @@ def _model_endpoint_validations(
                                      In case of discrepancy between the provided `sample_set_statistics` and the
                                      `model_endpoints.spec.feature_stats`, a warning will be presented to the user.
     """
-    # Model path
-    if model_path and model_endpoint.spec.model_uri != model_path:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            f"provided model store path {model_path} does not match "
-            f"the path that is stored under the existing model "
-            f"endpoint record: {model_endpoint.spec.model_uri}"
+    # Model Path
+    if model_path:
+        # Generate the parsed model uri that is based on hash, key, iter, and tree
+        model_obj = mlrun.datastore.get_store_resource(model_path)
+        model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
+            project=model_endpoint.metadata.project,
+            key=model_obj.key,
+            iter=model_obj.iter,
+            tree=model_obj.tree,
+        )
+        # Enrich the uri schema with the store prefix
+        model_artifact_uri = mlrun.datastore.get_store_uri(
+            kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
         )
+        if model_endpoint.spec.model_uri != model_artifact_uri:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"provided model store path {model_path} does not match "
+                f"the path that is stored under the existing model "
+                f"endpoint record: {model_endpoint.spec.model_uri}"
+            )
     # Feature stats
     if (
         sample_set_statistics
@@ -605,5 +622,5 @@ def _create_model_monitoring_function_base(
         name="PushToMonitoringWriter",
         project=project,
         writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
-    ).respond()
+    )
     return func_obj

mlrun/model_monitoring/applications/_application_steps.py CHANGED Viewed

@@ -19,6 +19,8 @@ import mlrun.common.helpers
 import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.model_monitoring.constants as mm_constant
 import mlrun.datastore
+import mlrun.serving
+import mlrun.utils.helpers
 import mlrun.utils.v3io_clients
 from mlrun.model_monitoring.helpers import get_stream_path
 from mlrun.serving.utils import StepToDict
@@ -33,8 +35,8 @@ class _PushToMonitoringWriter(StepToDict):
     def __init__(
         self,
-        project: Optional[str] = None,
-        writer_application_name: Optional[str] = None,
+        project: str,
+        writer_application_name: str,
         stream_uri: Optional[str] = None,
         name: Optional[str] = None,
     ):
@@ -108,6 +110,7 @@ class _PushToMonitoringWriter(StepToDict):
                 f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
             )
             self.output_stream.push([writer_event])
+            logger.info(f"Pushed data to {self.stream_uri} successfully")
     def _lazy_init(self):
         if self.output_stream is None:
@@ -149,9 +152,15 @@ class _PrepareMonitoringEvent(StepToDict):
     @staticmethod
     def _create_mlrun_context(app_name: str):
+        artifact_path = mlrun.utils.helpers.template_artifact_path(
+            mlrun.mlconf.artifact_path, mlrun.mlconf.default_project
+        )
         context = mlrun.get_or_create_ctx(
             f"{app_name}-logger",
-            upload_artifacts=True,
+            spec={
+                "metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}},
+                "spec": {mlrun.utils.helpers.RunKeys.output_path: artifact_path},
+            },
         )
         context.__class__ = MonitoringApplicationContext
         return context

mlrun/model_monitoring/applications/base.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import Any, Union, cast
 import numpy as np
 import pandas as pd
+from deprecated import deprecated
 import mlrun
 import mlrun.model_monitoring.applications.context as mm_context
@@ -112,6 +113,13 @@ class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
         raise NotImplementedError
+# TODO: Remove in 1.9.0
+@deprecated(
+    version="1.7.0",
+    reason="The `ModelMonitoringApplicationBase` class is deprecated from "
+    "version 1.7.0 and will be removed in version 1.9.0. "
+    "Use `ModelMonitoringApplicationBaseV2` as your application's base class.",
+)
 class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
     """
     A base class for a model monitoring application.

mlrun/model_monitoring/applications/evidently_base.py CHANGED Viewed

@@ -14,10 +14,11 @@
 import uuid
 import warnings
-from typing import Union
+from abc import ABC
 import pandas as pd
 import semver
+from deprecated import deprecated
 import mlrun.model_monitoring.applications.base as mm_base
 import mlrun.model_monitoring.applications.context as mm_context
@@ -57,14 +58,22 @@ except ModuleNotFoundError:
 if _HAS_EVIDENTLY:
-    from evidently.report.report import Report
-    from evidently.suite.base_suite import Suite
+    from evidently.suite.base_suite import Display
     from evidently.ui.type_aliases import STR_UUID
     from evidently.ui.workspace import Workspace
     from evidently.utils.dashboard import TemplateParams, file_html_template
-class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
+# TODO: Remove in 1.9.0
+@deprecated(
+    version="1.7.0",
+    reason="The `EvidentlyModelMonitoringApplicationBase` class is deprecated from "
+    "version 1.7.0 and will be removed in version 1.9.0. "
+    "Use `EvidentlyModelMonitoringApplicationBaseV2` as your application's base class.",
+)
+class EvidentlyModelMonitoringApplicationBase(
+    mm_base.ModelMonitoringApplicationBase, ABC
+):
     def __init__(
         self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
     ) -> None:
@@ -86,12 +95,12 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
         )
     def log_evidently_object(
-        self, evidently_object: Union["Report", "Suite"], artifact_name: str
-    ):
+        self, evidently_object: "Display", artifact_name: str
+    ) -> None:
         """
          Logs an Evidently report or suite as an artifact.
-        :param evidently_object:    (Union[Report, Suite]) The Evidently report or suite object.
+        :param evidently_object:    (Display) The Evidently display to log, e.g. a report or a test suite object.
         :param artifact_name:       (str) The name for the logged artifact.
         """
         evidently_object_html = evidently_object.get_html()
@@ -122,18 +131,14 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
             additional_graphs={},
         )
-        dashboard_html = self._render(file_html_template, template_params)
+        dashboard_html = file_html_template(params=template_params)
         self.context.log_artifact(
             artifact_name, body=dashboard_html.encode("utf-8"), format="html"
         )
-    @staticmethod
-    def _render(temple_func, template_params: "TemplateParams"):
-        return temple_func(params=template_params)
 class EvidentlyModelMonitoringApplicationBaseV2(
-    mm_base.ModelMonitoringApplicationBaseV2
+    mm_base.ModelMonitoringApplicationBaseV2, ABC
 ):
     def __init__(
         self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
@@ -160,14 +165,14 @@ class EvidentlyModelMonitoringApplicationBaseV2(
     @staticmethod
     def log_evidently_object(
         monitoring_context: mm_context.MonitoringApplicationContext,
-        evidently_object: Union["Report", "Suite"],
+        evidently_object: "Display",
         artifact_name: str,
-    ):
+    ) -> None:
         """
          Logs an Evidently report or suite as an artifact.
         :param monitoring_context:  (MonitoringApplicationContext) The monitoring context to process.
-        :param evidently_object:    (Union[Report, Suite]) The Evidently report or suite object.
+        :param evidently_object:    (Display) The Evidently display to log, e.g. a report or a test suite object.
         :param artifact_name:       (str) The name for the logged artifact.
         """
         evidently_object_html = evidently_object.get_html()
@@ -181,7 +186,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
         timestamp_start: pd.Timestamp,
         timestamp_end: pd.Timestamp,
         artifact_name: str = "dashboard",
-    ):
+    ) -> None:
         """
         Logs an Evidently project dashboard.
@@ -200,11 +205,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
             additional_graphs={},
         )
-        dashboard_html = self._render(file_html_template, template_params)
+        dashboard_html = file_html_template(params=template_params)
         monitoring_context.log_artifact(
             artifact_name, body=dashboard_html.encode("utf-8"), format="html"
         )
-    @staticmethod
-    def _render(temple_func, template_params: "TemplateParams"):
-        return temple_func(params=template_params)

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import concurrent.futures
 import datetime
 import json
+import multiprocessing
 import os
 import re
 from collections.abc import Iterator
@@ -363,7 +364,10 @@ class MonitoringApplicationController:
             return
         # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
         with concurrent.futures.ProcessPoolExecutor(
-            max_workers=min(len(endpoints), 10)
+            max_workers=min(len(endpoints), 10),
+            # On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
+            # and resources (such as sockets), which is not what we want (ML-7160)
+            mp_context=multiprocessing.get_context("spawn"),
         ) as pool:
             for endpoint in endpoints:
                 if (

mlrun/model_monitoring/db/stores/sqldb/models/mysql.py CHANGED Viewed

@@ -18,6 +18,7 @@ from sqlalchemy.ext.declarative import declarative_base, declared_attr
 from mlrun.common.schemas.model_monitoring import (
     EventFieldType,
+    ResultData,
     WriterEvent,
 )
@@ -32,6 +33,13 @@ Base = declarative_base()
 class ModelEndpointsTable(Base, ModelEndpointsBaseTable):
+    feature_stats = Column(
+        EventFieldType.FEATURE_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
+    current_stats = Column(
+        EventFieldType.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
+    metrics = Column(EventFieldType.METRICS, sqlalchemy.dialects.mysql.MEDIUMTEXT)
     first_request = Column(
         EventFieldType.FIRST_REQUEST,
         # TODO: migrate to DATETIME, see ML-6921
@@ -72,7 +80,12 @@ class _ApplicationResultOrMetric:
 class ApplicationResultTable(
     Base, _ApplicationResultOrMetric, ApplicationResultBaseTable
 ):
-    pass
+    result_extra_data = Column(
+        ResultData.RESULT_EXTRA_DATA, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
+    current_stats = Column(
+        ResultData.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
+    )
 class ApplicationMetricsTable(

mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py CHANGED Viewed

@@ -350,7 +350,7 @@ class KVStoreBase(StoreBase):
             table_path = self._get_results_table_path(endpoint_id)
             key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
             metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
-            attributes = {metric_name: json.dumps(event)}
+            attributes = {metric_name: self._encode_field(json.dumps(event))}
         else:
             raise ValueError(f"Invalid {kind = }")

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -17,6 +17,7 @@ from abc import ABC, abstractmethod
 from datetime import datetime
 import pandas as pd
+import pydantic
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.model_monitoring.db.tsdb.helpers
@@ -289,19 +290,27 @@ class TSDBConnector(ABC):
             full_name = mlrun.model_monitoring.helpers._compose_full_name(
                 project=project, app=app_name, name=name
             )
-            metrics_values.append(
-                mm_schemas.ModelEndpointMonitoringResultValues(
+            try:
+                metrics_values.append(
+                    mm_schemas.ModelEndpointMonitoringResultValues(
+                        full_name=full_name,
+                        result_kind=result_kind,
+                        values=list(
+                            zip(
+                                sub_df.index,
+                                sub_df[mm_schemas.ResultData.RESULT_VALUE],
+                                sub_df[mm_schemas.ResultData.RESULT_STATUS],
+                            )
+                        ),  # pyright: ignore[reportArgumentType]
+                    )
+                )
+            except pydantic.ValidationError:
+                logger.exception(
+                    "Failed to convert data-frame into `ModelEndpointMonitoringResultValues`",
                     full_name=full_name,
-                    result_kind=result_kind,
-                    values=list(
-                        zip(
-                            sub_df.index,
-                            sub_df[mm_schemas.ResultData.RESULT_VALUE],
-                            sub_df[mm_schemas.ResultData.RESULT_STATUS],
-                        )
-                    ),  # pyright: ignore[reportArgumentType]
+                    sub_df_json=sub_df.to_json(),
                 )
-            )
+                raise
             del metrics_without_data[full_name]
         for metric in metrics_without_data.values():

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -45,8 +45,7 @@ class _BatchDict(typing.TypedDict):
 def get_stream_path(
-    project: str = None,
-    function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
+    project: str, function_name: str = mm_constants.MonitoringFunctionNames.STREAM
 ) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -557,6 +557,26 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         # Separate each model invocation into sub events that will be stored as dictionary
         # in list of events. This list will be used as the body for the storey event.
+        if not isinstance(features, list):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Model's inputs must be a list"
+            )
+        features = (
+            features
+            if not any(not isinstance(feat, list) for feat in features)
+            else [features]
+        )
+        if not isinstance(predictions, list):
+            predictions = [[predictions]]
+        elif isinstance(predictions, list) and len(predictions) == len(features):
+            pass  # predictions are already in the right format
+        else:
+            predictions = (
+                predictions
+                if not any(not isinstance(pred, list) for pred in predictions)
+                else [predictions]
+            )
         events = []
         for i, (feature, prediction) in enumerate(zip(features, predictions)):
             if not isinstance(prediction, list):

mlrun 1.7.0rc35__py3-none-any.whl → 1.7.0rc37__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc35py3-none-any.whl → 1.7.0rc37py3-none-any.whl