mlrun 1.7.0rc35__py3-none-any.whl → 1.7.0rc37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (41) hide show
  1. mlrun/alerts/alert.py +63 -0
  2. mlrun/common/schemas/alert.py +2 -2
  3. mlrun/common/schemas/api_gateway.py +1 -1
  4. mlrun/common/schemas/notification.py +23 -4
  5. mlrun/config.py +1 -0
  6. mlrun/datastore/s3.py +8 -1
  7. mlrun/datastore/spark_utils.py +30 -0
  8. mlrun/feature_store/api.py +19 -1
  9. mlrun/feature_store/steps.py +8 -0
  10. mlrun/model_monitoring/api.py +24 -7
  11. mlrun/model_monitoring/applications/_application_steps.py +12 -3
  12. mlrun/model_monitoring/applications/base.py +8 -0
  13. mlrun/model_monitoring/applications/evidently_base.py +23 -22
  14. mlrun/model_monitoring/controller.py +5 -1
  15. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +14 -1
  16. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +1 -1
  17. mlrun/model_monitoring/db/tsdb/base.py +20 -11
  18. mlrun/model_monitoring/helpers.py +1 -2
  19. mlrun/model_monitoring/stream_processing.py +20 -0
  20. mlrun/model_monitoring/writer.py +4 -1
  21. mlrun/projects/operations.py +4 -0
  22. mlrun/projects/project.py +4 -0
  23. mlrun/runtimes/base.py +3 -0
  24. mlrun/runtimes/nuclio/api_gateway.py +1 -1
  25. mlrun/runtimes/nuclio/application/application.py +53 -12
  26. mlrun/runtimes/nuclio/function.py +5 -1
  27. mlrun/runtimes/sparkjob/spark3job.py +4 -7
  28. mlrun/runtimes/utils.py +18 -0
  29. mlrun/serving/routers.py +1 -4
  30. mlrun/serving/server.py +4 -7
  31. mlrun/serving/states.py +8 -3
  32. mlrun/serving/v2_serving.py +9 -9
  33. mlrun/utils/db.py +15 -0
  34. mlrun/utils/http.py +1 -1
  35. mlrun/utils/version/version.json +2 -2
  36. {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/METADATA +6 -6
  37. {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/RECORD +41 -41
  38. {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/LICENSE +0 -0
  39. {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/WHEEL +0 -0
  40. {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/entry_points.txt +0 -0
  41. {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/top_level.txt +0 -0
mlrun/alerts/alert.py CHANGED
@@ -28,6 +28,7 @@ class AlertConfig(ModelObj):
28
28
  "severity",
29
29
  "reset_policy",
30
30
  "state",
31
+ "count",
31
32
  ]
32
33
  _fields_to_serialize = ModelObj._fields_to_serialize + [
33
34
  "entities",
@@ -54,6 +55,68 @@ class AlertConfig(ModelObj):
54
55
  created: str = None,
55
56
  count: int = None,
56
57
  ):
58
+ """
59
+ Alert config object
60
+
61
+ Example::
62
+ # create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
63
+ 3 times in the next hour.
64
+ from mlrun.alerts import AlertConfig
65
+ import mlrun.common.schemas.alert as alert_objects
66
+
67
+ entity_kind = alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT
68
+ entity_id = get_default_result_instance_fqn(endpoint_id)
69
+ event_name = alert_objects.EventKind.DATA_DRIFT_DETECTED
70
+ notification = mlrun.model.Notification(
71
+ kind="slack",
72
+ name="slack_notification",
73
+ message="drift was detected",
74
+ severity="warning",
75
+ when=["now"],
76
+ condition="failed",
77
+ secret_params={
78
+ "webhook": "https://hooks.slack.com/",
79
+ },
80
+ ).to_dict()
81
+
82
+ alert_data = AlertConfig(
83
+ project="my-project",
84
+ name="drift-alert",
85
+ summary="a drift was detected",
86
+ severity=alert_objects.AlertSeverity.LOW,
87
+ entities=alert_objects.EventEntities(
88
+ kind=entity_kind, project="my-project", ids=[entity_id]
89
+ ),
90
+ trigger=alert_objects.AlertTrigger(events=[event_name]),
91
+ criteria=alert_objects.AlertCriteria(count=3, period="1h"),
92
+ notifications=[alert_objects.AlertNotification(notification=notification)],
93
+ )
94
+ project.store_alert_config(alert_data)
95
+
96
+ :param project: name of the project to associate the alert with
97
+ :param name: name of the alert
98
+ :param template: optional parameter that allows to create an alert based on a predefined template.
99
+ you can pass either an AlertTemplate object or a string (the template name).
100
+ if a template is used, many fields of the alert will be auto-generated based on the
101
+ template. however, you still need to provide the following fields:
102
+ `name`, `project`, `entity`, `notifications`
103
+ :param description: description of the alert
104
+ :param summary: summary of the alert, will be sent in the generated notifications
105
+ :param severity: severity of the alert
106
+ :param trigger: the events that will trigger this alert, may be a simple trigger based on events or
107
+ complex trigger which is based on a prometheus alert
108
+ :param criteria: when the alert will be triggered based on the specified number of events within the
109
+ defined time period.
110
+ :param reset_policy: when to clear the alert. May be "manual" for manual reset of the alert, or
111
+ "auto" if the criteria contains a time period
112
+ :param notifications: list of notifications to invoke once the alert is triggered
113
+ :param entities: entities that the event relates to. The entity object will contain fields that uniquely
114
+ identify a given entity in the system
115
+ :param id: internal id of the alert (user should not supply it)
116
+ :param state: state of the alert, may be active/inactive (user should not supply it)
117
+ :param created: when the alert is created (user should not supply it)
118
+ :param count: internal counter of the alert (user should not supply it)
119
+ """
57
120
  self.project = project
58
121
  self.name = name
59
122
  self.description = description
@@ -149,7 +149,7 @@ class AlertConfig(pydantic.BaseModel):
149
149
  entities: EventEntities
150
150
  trigger: AlertTrigger
151
151
  criteria: Optional[AlertCriteria]
152
- reset_policy: ResetPolicy = ResetPolicy.MANUAL
152
+ reset_policy: ResetPolicy = ResetPolicy.AUTO
153
153
  notifications: pydantic.conlist(AlertNotification, min_items=1)
154
154
  state: AlertActiveState = AlertActiveState.INACTIVE
155
155
  count: Optional[int] = 0
@@ -185,7 +185,7 @@ class AlertTemplate(
185
185
  severity: AlertSeverity
186
186
  trigger: AlertTrigger
187
187
  criteria: Optional[AlertCriteria]
188
- reset_policy: ResetPolicy = ResetPolicy.MANUAL
188
+ reset_policy: ResetPolicy = ResetPolicy.AUTO
189
189
 
190
190
  # This is slightly different than __eq__ as it doesn't compare everything
191
191
  def templates_differ(self, other):
@@ -107,7 +107,7 @@ class APIGateway(_APIGatewayBaseModel):
107
107
  self.spec.host + self.spec.path
108
108
  if self.spec.path and self.spec.host
109
109
  else self.spec.host
110
- )
110
+ ).rstrip("/")
111
111
 
112
112
  def enrich_mlrun_names(self):
113
113
  self._enrich_api_gateway_mlrun_name()
@@ -50,15 +50,34 @@ class NotificationLimits(enum.Enum):
50
50
 
51
51
 
52
52
  class Notification(pydantic.BaseModel):
53
+ """
54
+ Notification object schema
55
+ :param kind: notification implementation kind - slack, webhook, etc.
56
+ :param name: for logging and identification
57
+ :param message: message content in the notification
58
+ :param severity: severity to display in the notification
59
+ :param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
60
+ :param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
61
+ to evaluate if the notification should be sent in addition to the 'when' statuses.
62
+ e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
63
+ :param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
64
+ git repository details, etc.)
65
+ :param secret_params: secret parameters for the notification implementation, same as params but will be stored
66
+ in a k8s secret and passed as a secret reference to the implementation.
67
+ :param status: notification status - pending, sent, error
68
+ :param sent_time: time the notification was sent
69
+ :param reason: failure reason if the notification failed to send
70
+ """
71
+
53
72
  kind: NotificationKind
54
73
  name: str
55
74
  message: str
56
75
  severity: NotificationSeverity
57
76
  when: list[str]
58
- condition: str = None
59
- params: dict[str, typing.Any] = None
60
- status: NotificationStatus = None
61
- sent_time: typing.Union[str, datetime.datetime] = None
77
+ condition: typing.Optional[str] = None
78
+ params: typing.Optional[dict[str, typing.Any]] = None
79
+ status: typing.Optional[NotificationStatus] = None
80
+ sent_time: typing.Optional[typing.Union[str, datetime.datetime]] = None
62
81
  secret_params: typing.Optional[dict[str, typing.Any]] = None
63
82
  reason: typing.Optional[str] = None
64
83
 
mlrun/config.py CHANGED
@@ -1166,6 +1166,7 @@ class Config:
1166
1166
  )
1167
1167
  elif kind == "stream": # return list for mlrun<1.6.3 BC
1168
1168
  return [
1169
+ # TODO: remove the first stream in 1.9.0
1169
1170
  mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1170
1171
  project=project,
1171
1172
  kind=kind,
mlrun/datastore/s3.py CHANGED
@@ -15,6 +15,7 @@
15
15
  import time
16
16
 
17
17
  import boto3
18
+ from boto3.s3.transfer import TransferConfig
18
19
  from fsspec.registry import get_filesystem_class
19
20
 
20
21
  import mlrun.errors
@@ -40,6 +41,12 @@ class S3Store(DataStore):
40
41
  profile_name = self._get_secret_or_env("AWS_PROFILE")
41
42
  assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
42
43
 
44
+ self.config = TransferConfig(
45
+ multipart_threshold=1024 * 1024 * 25,
46
+ max_concurrency=10,
47
+ multipart_chunksize=1024 * 1024 * 25,
48
+ )
49
+
43
50
  # If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
44
51
  if assume_role_arn:
45
52
  client = boto3.client(
@@ -166,7 +173,7 @@ class S3Store(DataStore):
166
173
 
167
174
  def upload(self, key, src_path):
168
175
  bucket, key = self.get_bucket_and_key(key)
169
- self.s3.Object(bucket, key).put(Body=open(src_path, "rb"))
176
+ self.s3.Bucket(bucket).upload_file(src_path, key, Config=self.config)
170
177
 
171
178
  def get(self, key, size=None, offset=0):
172
179
  bucket, key = self.get_bucket_and_key(key)
@@ -13,7 +13,10 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
+ from typing import Union
17
+
16
18
  import mlrun
19
+ from mlrun.features import Entity
17
20
 
18
21
 
19
22
  def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str]:
@@ -35,3 +38,30 @@ def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str
35
38
  else:
36
39
  non_hadoop_spark_options[key] = value
37
40
  return non_hadoop_spark_options
41
+
42
+
43
+ def check_special_columns_exists(
44
+ spark_df, entities: list[Union[Entity, str]], timestamp_key: str, label_column: str
45
+ ):
46
+ columns = spark_df.columns
47
+ entities = entities or []
48
+ entities = [
49
+ entity.name if isinstance(entity, Entity) else entity for entity in entities
50
+ ]
51
+ missing_entities = [entity for entity in entities if entity not in columns]
52
+ cases_message = "Please check the letter cases (uppercase or lowercase)"
53
+ if missing_entities:
54
+ raise mlrun.errors.MLRunInvalidArgumentError(
55
+ f"There are missing entities from dataframe during ingestion. missing_entities: {missing_entities}."
56
+ f" {cases_message}"
57
+ )
58
+ if timestamp_key and timestamp_key not in columns:
59
+ raise mlrun.errors.MLRunInvalidArgumentError(
60
+ f"timestamp_key is missing from dataframe during ingestion. timestamp_key: {timestamp_key}."
61
+ f" {cases_message}"
62
+ )
63
+ if label_column and label_column not in columns:
64
+ raise mlrun.errors.MLRunInvalidArgumentError(
65
+ f"label_column is missing from dataframe during ingestion. label_column: {label_column}. "
66
+ f"{cases_message}"
67
+ )
@@ -1032,6 +1032,8 @@ def _ingest_with_spark(
1032
1032
  try:
1033
1033
  import pyspark.sql
1034
1034
 
1035
+ from mlrun.datastore.spark_utils import check_special_columns_exists
1036
+
1035
1037
  if spark is None or spark is True:
1036
1038
  # create spark context
1037
1039
 
@@ -1050,7 +1052,6 @@ def _ingest_with_spark(
1050
1052
  created_spark_context = True
1051
1053
 
1052
1054
  timestamp_key = featureset.spec.timestamp_key
1053
-
1054
1055
  if isinstance(source, pd.DataFrame):
1055
1056
  df = spark.createDataFrame(source)
1056
1057
  elif isinstance(source, pyspark.sql.DataFrame):
@@ -1080,6 +1081,12 @@ def _ingest_with_spark(
1080
1081
  target = get_target_driver(target, featureset)
1081
1082
  target.set_resource(featureset)
1082
1083
  if featureset.spec.passthrough and target.is_offline:
1084
+ check_special_columns_exists(
1085
+ spark_df=df,
1086
+ entities=featureset.spec.entities,
1087
+ timestamp_key=timestamp_key,
1088
+ label_column=featureset.spec.label_column,
1089
+ )
1083
1090
  continue
1084
1091
  spark_options = target.get_spark_options(
1085
1092
  key_columns, timestamp_key, overwrite
@@ -1090,6 +1097,17 @@ def _ingest_with_spark(
1090
1097
  df_to_write, key_columns, timestamp_key, spark_options
1091
1098
  )
1092
1099
  write_format = spark_options.pop("format", None)
1100
+ # We can get to this point if the column exists in different letter cases,
1101
+ # so PySpark will be able to read it, but we still have to raise an exception for it.
1102
+
1103
+ # This check is here and not in to_spark_df because in spark_merger we can have a target
1104
+ # that has different letter cases than the source, like in SnowflakeTarget.
1105
+ check_special_columns_exists(
1106
+ spark_df=df_to_write,
1107
+ entities=featureset.spec.entities,
1108
+ timestamp_key=timestamp_key,
1109
+ label_column=featureset.spec.label_column,
1110
+ )
1093
1111
  if overwrite:
1094
1112
  write_spark_dataframe_with_options(
1095
1113
  spark_options, df_to_write, "overwrite", write_format=write_format
@@ -743,3 +743,11 @@ class DropFeatures(StepToDict, MLRunStep):
743
743
  raise mlrun.errors.MLRunInvalidArgumentError(
744
744
  f"DropFeatures can only drop features, not entities: {dropped_entities}"
745
745
  )
746
+ if feature_set.spec.label_column in features:
747
+ raise mlrun.errors.MLRunInvalidArgumentError(
748
+ f"DropFeatures can not drop label_column: {feature_set.spec.label_column}"
749
+ )
750
+ if feature_set.spec.timestamp_key in features:
751
+ raise mlrun.errors.MLRunInvalidArgumentError(
752
+ f"DropFeatures can not drop timestamp_key: {feature_set.spec.timestamp_key}"
753
+ )
@@ -252,14 +252,31 @@ def _model_endpoint_validations(
252
252
  In case of discrepancy between the provided `sample_set_statistics` and the
253
253
  `model_endpoints.spec.feature_stats`, a warning will be presented to the user.
254
254
  """
255
- # Model path
256
- if model_path and model_endpoint.spec.model_uri != model_path:
257
- raise mlrun.errors.MLRunInvalidArgumentError(
258
- f"provided model store path {model_path} does not match "
259
- f"the path that is stored under the existing model "
260
- f"endpoint record: {model_endpoint.spec.model_uri}"
255
+
256
+ # Model Path
257
+ if model_path:
258
+ # Generate the parsed model uri that is based on hash, key, iter, and tree
259
+ model_obj = mlrun.datastore.get_store_resource(model_path)
260
+
261
+ model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
262
+ project=model_endpoint.metadata.project,
263
+ key=model_obj.key,
264
+ iter=model_obj.iter,
265
+ tree=model_obj.tree,
266
+ )
267
+
268
+ # Enrich the uri schema with the store prefix
269
+ model_artifact_uri = mlrun.datastore.get_store_uri(
270
+ kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
261
271
  )
262
272
 
273
+ if model_endpoint.spec.model_uri != model_artifact_uri:
274
+ raise mlrun.errors.MLRunInvalidArgumentError(
275
+ f"provided model store path {model_path} does not match "
276
+ f"the path that is stored under the existing model "
277
+ f"endpoint record: {model_endpoint.spec.model_uri}"
278
+ )
279
+
263
280
  # Feature stats
264
281
  if (
265
282
  sample_set_statistics
@@ -605,5 +622,5 @@ def _create_model_monitoring_function_base(
605
622
  name="PushToMonitoringWriter",
606
623
  project=project,
607
624
  writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
608
- ).respond()
625
+ )
609
626
  return func_obj
@@ -19,6 +19,8 @@ import mlrun.common.helpers
19
19
  import mlrun.common.model_monitoring.helpers
20
20
  import mlrun.common.schemas.model_monitoring.constants as mm_constant
21
21
  import mlrun.datastore
22
+ import mlrun.serving
23
+ import mlrun.utils.helpers
22
24
  import mlrun.utils.v3io_clients
23
25
  from mlrun.model_monitoring.helpers import get_stream_path
24
26
  from mlrun.serving.utils import StepToDict
@@ -33,8 +35,8 @@ class _PushToMonitoringWriter(StepToDict):
33
35
 
34
36
  def __init__(
35
37
  self,
36
- project: Optional[str] = None,
37
- writer_application_name: Optional[str] = None,
38
+ project: str,
39
+ writer_application_name: str,
38
40
  stream_uri: Optional[str] = None,
39
41
  name: Optional[str] = None,
40
42
  ):
@@ -108,6 +110,7 @@ class _PushToMonitoringWriter(StepToDict):
108
110
  f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
109
111
  )
110
112
  self.output_stream.push([writer_event])
113
+ logger.info(f"Pushed data to {self.stream_uri} successfully")
111
114
 
112
115
  def _lazy_init(self):
113
116
  if self.output_stream is None:
@@ -149,9 +152,15 @@ class _PrepareMonitoringEvent(StepToDict):
149
152
 
150
153
  @staticmethod
151
154
  def _create_mlrun_context(app_name: str):
155
+ artifact_path = mlrun.utils.helpers.template_artifact_path(
156
+ mlrun.mlconf.artifact_path, mlrun.mlconf.default_project
157
+ )
152
158
  context = mlrun.get_or_create_ctx(
153
159
  f"{app_name}-logger",
154
- upload_artifacts=True,
160
+ spec={
161
+ "metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}},
162
+ "spec": {mlrun.utils.helpers.RunKeys.output_path: artifact_path},
163
+ },
155
164
  )
156
165
  context.__class__ = MonitoringApplicationContext
157
166
  return context
@@ -17,6 +17,7 @@ from typing import Any, Union, cast
17
17
 
18
18
  import numpy as np
19
19
  import pandas as pd
20
+ from deprecated import deprecated
20
21
 
21
22
  import mlrun
22
23
  import mlrun.model_monitoring.applications.context as mm_context
@@ -112,6 +113,13 @@ class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
112
113
  raise NotImplementedError
113
114
 
114
115
 
116
+ # TODO: Remove in 1.9.0
117
+ @deprecated(
118
+ version="1.7.0",
119
+ reason="The `ModelMonitoringApplicationBase` class is deprecated from "
120
+ "version 1.7.0 and will be removed in version 1.9.0. "
121
+ "Use `ModelMonitoringApplicationBaseV2` as your application's base class.",
122
+ )
115
123
  class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
116
124
  """
117
125
  A base class for a model monitoring application.
@@ -14,10 +14,11 @@
14
14
 
15
15
  import uuid
16
16
  import warnings
17
- from typing import Union
17
+ from abc import ABC
18
18
 
19
19
  import pandas as pd
20
20
  import semver
21
+ from deprecated import deprecated
21
22
 
22
23
  import mlrun.model_monitoring.applications.base as mm_base
23
24
  import mlrun.model_monitoring.applications.context as mm_context
@@ -57,14 +58,22 @@ except ModuleNotFoundError:
57
58
 
58
59
 
59
60
  if _HAS_EVIDENTLY:
60
- from evidently.report.report import Report
61
- from evidently.suite.base_suite import Suite
61
+ from evidently.suite.base_suite import Display
62
62
  from evidently.ui.type_aliases import STR_UUID
63
63
  from evidently.ui.workspace import Workspace
64
64
  from evidently.utils.dashboard import TemplateParams, file_html_template
65
65
 
66
66
 
67
- class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplicationBase):
67
+ # TODO: Remove in 1.9.0
68
+ @deprecated(
69
+ version="1.7.0",
70
+ reason="The `EvidentlyModelMonitoringApplicationBase` class is deprecated from "
71
+ "version 1.7.0 and will be removed in version 1.9.0. "
72
+ "Use `EvidentlyModelMonitoringApplicationBaseV2` as your application's base class.",
73
+ )
74
+ class EvidentlyModelMonitoringApplicationBase(
75
+ mm_base.ModelMonitoringApplicationBase, ABC
76
+ ):
68
77
  def __init__(
69
78
  self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
70
79
  ) -> None:
@@ -86,12 +95,12 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
86
95
  )
87
96
 
88
97
  def log_evidently_object(
89
- self, evidently_object: Union["Report", "Suite"], artifact_name: str
90
- ):
98
+ self, evidently_object: "Display", artifact_name: str
99
+ ) -> None:
91
100
  """
92
101
  Logs an Evidently report or suite as an artifact.
93
102
 
94
- :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
103
+ :param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
95
104
  :param artifact_name: (str) The name for the logged artifact.
96
105
  """
97
106
  evidently_object_html = evidently_object.get_html()
@@ -122,18 +131,14 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
122
131
  additional_graphs={},
123
132
  )
124
133
 
125
- dashboard_html = self._render(file_html_template, template_params)
134
+ dashboard_html = file_html_template(params=template_params)
126
135
  self.context.log_artifact(
127
136
  artifact_name, body=dashboard_html.encode("utf-8"), format="html"
128
137
  )
129
138
 
130
- @staticmethod
131
- def _render(temple_func, template_params: "TemplateParams"):
132
- return temple_func(params=template_params)
133
-
134
139
 
135
140
  class EvidentlyModelMonitoringApplicationBaseV2(
136
- mm_base.ModelMonitoringApplicationBaseV2
141
+ mm_base.ModelMonitoringApplicationBaseV2, ABC
137
142
  ):
138
143
  def __init__(
139
144
  self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
@@ -160,14 +165,14 @@ class EvidentlyModelMonitoringApplicationBaseV2(
160
165
  @staticmethod
161
166
  def log_evidently_object(
162
167
  monitoring_context: mm_context.MonitoringApplicationContext,
163
- evidently_object: Union["Report", "Suite"],
168
+ evidently_object: "Display",
164
169
  artifact_name: str,
165
- ):
170
+ ) -> None:
166
171
  """
167
172
  Logs an Evidently report or suite as an artifact.
168
173
 
169
174
  :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
170
- :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
175
+ :param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
171
176
  :param artifact_name: (str) The name for the logged artifact.
172
177
  """
173
178
  evidently_object_html = evidently_object.get_html()
@@ -181,7 +186,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
181
186
  timestamp_start: pd.Timestamp,
182
187
  timestamp_end: pd.Timestamp,
183
188
  artifact_name: str = "dashboard",
184
- ):
189
+ ) -> None:
185
190
  """
186
191
  Logs an Evidently project dashboard.
187
192
 
@@ -200,11 +205,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
200
205
  additional_graphs={},
201
206
  )
202
207
 
203
- dashboard_html = self._render(file_html_template, template_params)
208
+ dashboard_html = file_html_template(params=template_params)
204
209
  monitoring_context.log_artifact(
205
210
  artifact_name, body=dashboard_html.encode("utf-8"), format="html"
206
211
  )
207
-
208
- @staticmethod
209
- def _render(temple_func, template_params: "TemplateParams"):
210
- return temple_func(params=template_params)
@@ -15,6 +15,7 @@
15
15
  import concurrent.futures
16
16
  import datetime
17
17
  import json
18
+ import multiprocessing
18
19
  import os
19
20
  import re
20
21
  from collections.abc import Iterator
@@ -363,7 +364,10 @@ class MonitoringApplicationController:
363
364
  return
364
365
  # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
365
366
  with concurrent.futures.ProcessPoolExecutor(
366
- max_workers=min(len(endpoints), 10)
367
+ max_workers=min(len(endpoints), 10),
368
+ # On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
369
+ # and resources (such as sockets), which is not what we want (ML-7160)
370
+ mp_context=multiprocessing.get_context("spawn"),
367
371
  ) as pool:
368
372
  for endpoint in endpoints:
369
373
  if (
@@ -18,6 +18,7 @@ from sqlalchemy.ext.declarative import declarative_base, declared_attr
18
18
 
19
19
  from mlrun.common.schemas.model_monitoring import (
20
20
  EventFieldType,
21
+ ResultData,
21
22
  WriterEvent,
22
23
  )
23
24
 
@@ -32,6 +33,13 @@ Base = declarative_base()
32
33
 
33
34
 
34
35
  class ModelEndpointsTable(Base, ModelEndpointsBaseTable):
36
+ feature_stats = Column(
37
+ EventFieldType.FEATURE_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
38
+ )
39
+ current_stats = Column(
40
+ EventFieldType.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
41
+ )
42
+ metrics = Column(EventFieldType.METRICS, sqlalchemy.dialects.mysql.MEDIUMTEXT)
35
43
  first_request = Column(
36
44
  EventFieldType.FIRST_REQUEST,
37
45
  # TODO: migrate to DATETIME, see ML-6921
@@ -72,7 +80,12 @@ class _ApplicationResultOrMetric:
72
80
  class ApplicationResultTable(
73
81
  Base, _ApplicationResultOrMetric, ApplicationResultBaseTable
74
82
  ):
75
- pass
83
+ result_extra_data = Column(
84
+ ResultData.RESULT_EXTRA_DATA, sqlalchemy.dialects.mysql.MEDIUMTEXT
85
+ )
86
+ current_stats = Column(
87
+ ResultData.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
88
+ )
76
89
 
77
90
 
78
91
  class ApplicationMetricsTable(
@@ -350,7 +350,7 @@ class KVStoreBase(StoreBase):
350
350
  table_path = self._get_results_table_path(endpoint_id)
351
351
  key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
352
352
  metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
353
- attributes = {metric_name: json.dumps(event)}
353
+ attributes = {metric_name: self._encode_field(json.dumps(event))}
354
354
  else:
355
355
  raise ValueError(f"Invalid {kind = }")
356
356
 
@@ -17,6 +17,7 @@ from abc import ABC, abstractmethod
17
17
  from datetime import datetime
18
18
 
19
19
  import pandas as pd
20
+ import pydantic
20
21
 
21
22
  import mlrun.common.schemas.model_monitoring as mm_schemas
22
23
  import mlrun.model_monitoring.db.tsdb.helpers
@@ -289,19 +290,27 @@ class TSDBConnector(ABC):
289
290
  full_name = mlrun.model_monitoring.helpers._compose_full_name(
290
291
  project=project, app=app_name, name=name
291
292
  )
292
- metrics_values.append(
293
- mm_schemas.ModelEndpointMonitoringResultValues(
293
+ try:
294
+ metrics_values.append(
295
+ mm_schemas.ModelEndpointMonitoringResultValues(
296
+ full_name=full_name,
297
+ result_kind=result_kind,
298
+ values=list(
299
+ zip(
300
+ sub_df.index,
301
+ sub_df[mm_schemas.ResultData.RESULT_VALUE],
302
+ sub_df[mm_schemas.ResultData.RESULT_STATUS],
303
+ )
304
+ ), # pyright: ignore[reportArgumentType]
305
+ )
306
+ )
307
+ except pydantic.ValidationError:
308
+ logger.exception(
309
+ "Failed to convert data-frame into `ModelEndpointMonitoringResultValues`",
294
310
  full_name=full_name,
295
- result_kind=result_kind,
296
- values=list(
297
- zip(
298
- sub_df.index,
299
- sub_df[mm_schemas.ResultData.RESULT_VALUE],
300
- sub_df[mm_schemas.ResultData.RESULT_STATUS],
301
- )
302
- ), # pyright: ignore[reportArgumentType]
311
+ sub_df_json=sub_df.to_json(),
303
312
  )
304
- )
313
+ raise
305
314
  del metrics_without_data[full_name]
306
315
 
307
316
  for metric in metrics_without_data.values():
@@ -45,8 +45,7 @@ class _BatchDict(typing.TypedDict):
45
45
 
46
46
 
47
47
  def get_stream_path(
48
- project: str = None,
49
- function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
48
+ project: str, function_name: str = mm_constants.MonitoringFunctionNames.STREAM
50
49
  ) -> str:
51
50
  """
52
51
  Get stream path from the project secret. If wasn't set, take it from the system configurations
@@ -557,6 +557,26 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
557
557
 
558
558
  # Separate each model invocation into sub events that will be stored as dictionary
559
559
  # in list of events. This list will be used as the body for the storey event.
560
+ if not isinstance(features, list):
561
+ raise mlrun.errors.MLRunInvalidArgumentError(
562
+ "Model's inputs must be a list"
563
+ )
564
+ features = (
565
+ features
566
+ if not any(not isinstance(feat, list) for feat in features)
567
+ else [features]
568
+ )
569
+ if not isinstance(predictions, list):
570
+ predictions = [[predictions]]
571
+ elif isinstance(predictions, list) and len(predictions) == len(features):
572
+ pass # predictions are already in the right format
573
+ else:
574
+ predictions = (
575
+ predictions
576
+ if not any(not isinstance(pred, list) for pred in predictions)
577
+ else [predictions]
578
+ )
579
+
560
580
  events = []
561
581
  for i, (feature, prediction) in enumerate(zip(features, predictions)):
562
582
  if not isinstance(prediction, list):