mlrun 1.7.0rc35__py3-none-any.whl → 1.7.0rc37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +63 -0
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/api_gateway.py +1 -1
- mlrun/common/schemas/notification.py +23 -4
- mlrun/config.py +1 -0
- mlrun/datastore/s3.py +8 -1
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/feature_store/api.py +19 -1
- mlrun/feature_store/steps.py +8 -0
- mlrun/model_monitoring/api.py +24 -7
- mlrun/model_monitoring/applications/_application_steps.py +12 -3
- mlrun/model_monitoring/applications/base.py +8 -0
- mlrun/model_monitoring/applications/evidently_base.py +23 -22
- mlrun/model_monitoring/controller.py +5 -1
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +14 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +1 -1
- mlrun/model_monitoring/db/tsdb/base.py +20 -11
- mlrun/model_monitoring/helpers.py +1 -2
- mlrun/model_monitoring/stream_processing.py +20 -0
- mlrun/model_monitoring/writer.py +4 -1
- mlrun/projects/operations.py +4 -0
- mlrun/projects/project.py +4 -0
- mlrun/runtimes/base.py +3 -0
- mlrun/runtimes/nuclio/api_gateway.py +1 -1
- mlrun/runtimes/nuclio/application/application.py +53 -12
- mlrun/runtimes/nuclio/function.py +5 -1
- mlrun/runtimes/sparkjob/spark3job.py +4 -7
- mlrun/runtimes/utils.py +18 -0
- mlrun/serving/routers.py +1 -4
- mlrun/serving/server.py +4 -7
- mlrun/serving/states.py +8 -3
- mlrun/serving/v2_serving.py +9 -9
- mlrun/utils/db.py +15 -0
- mlrun/utils/http.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/METADATA +6 -6
- {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/RECORD +41 -41
- {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc35.dist-info → mlrun-1.7.0rc37.dist-info}/top_level.txt +0 -0
mlrun/alerts/alert.py
CHANGED
|
@@ -28,6 +28,7 @@ class AlertConfig(ModelObj):
|
|
|
28
28
|
"severity",
|
|
29
29
|
"reset_policy",
|
|
30
30
|
"state",
|
|
31
|
+
"count",
|
|
31
32
|
]
|
|
32
33
|
_fields_to_serialize = ModelObj._fields_to_serialize + [
|
|
33
34
|
"entities",
|
|
@@ -54,6 +55,68 @@ class AlertConfig(ModelObj):
|
|
|
54
55
|
created: str = None,
|
|
55
56
|
count: int = None,
|
|
56
57
|
):
|
|
58
|
+
"""
|
|
59
|
+
Alert config object
|
|
60
|
+
|
|
61
|
+
Example::
|
|
62
|
+
# create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
|
|
63
|
+
3 times in the next hour.
|
|
64
|
+
from mlrun.alerts import AlertConfig
|
|
65
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
66
|
+
|
|
67
|
+
entity_kind = alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT
|
|
68
|
+
entity_id = get_default_result_instance_fqn(endpoint_id)
|
|
69
|
+
event_name = alert_objects.EventKind.DATA_DRIFT_DETECTED
|
|
70
|
+
notification = mlrun.model.Notification(
|
|
71
|
+
kind="slack",
|
|
72
|
+
name="slack_notification",
|
|
73
|
+
message="drift was detected",
|
|
74
|
+
severity="warning",
|
|
75
|
+
when=["now"],
|
|
76
|
+
condition="failed",
|
|
77
|
+
secret_params={
|
|
78
|
+
"webhook": "https://hooks.slack.com/",
|
|
79
|
+
},
|
|
80
|
+
).to_dict()
|
|
81
|
+
|
|
82
|
+
alert_data = AlertConfig(
|
|
83
|
+
project="my-project",
|
|
84
|
+
name="drift-alert",
|
|
85
|
+
summary="a drift was detected",
|
|
86
|
+
severity=alert_objects.AlertSeverity.LOW,
|
|
87
|
+
entities=alert_objects.EventEntities(
|
|
88
|
+
kind=entity_kind, project="my-project", ids=[entity_id]
|
|
89
|
+
),
|
|
90
|
+
trigger=alert_objects.AlertTrigger(events=[event_name]),
|
|
91
|
+
criteria=alert_objects.AlertCriteria(count=3, period="1h"),
|
|
92
|
+
notifications=[alert_objects.AlertNotification(notification=notification)],
|
|
93
|
+
)
|
|
94
|
+
project.store_alert_config(alert_data)
|
|
95
|
+
|
|
96
|
+
:param project: name of the project to associate the alert with
|
|
97
|
+
:param name: name of the alert
|
|
98
|
+
:param template: optional parameter that allows to create an alert based on a predefined template.
|
|
99
|
+
you can pass either an AlertTemplate object or a string (the template name).
|
|
100
|
+
if a template is used, many fields of the alert will be auto-generated based on the
|
|
101
|
+
template. however, you still need to provide the following fields:
|
|
102
|
+
`name`, `project`, `entity`, `notifications`
|
|
103
|
+
:param description: description of the alert
|
|
104
|
+
:param summary: summary of the alert, will be sent in the generated notifications
|
|
105
|
+
:param severity: severity of the alert
|
|
106
|
+
:param trigger: the events that will trigger this alert, may be a simple trigger based on events or
|
|
107
|
+
complex trigger which is based on a prometheus alert
|
|
108
|
+
:param criteria: when the alert will be triggered based on the specified number of events within the
|
|
109
|
+
defined time period.
|
|
110
|
+
:param reset_policy: when to clear the alert. May be "manual" for manual reset of the alert, or
|
|
111
|
+
"auto" if the criteria contains a time period
|
|
112
|
+
:param notifications: list of notifications to invoke once the alert is triggered
|
|
113
|
+
:param entities: entities that the event relates to. The entity object will contain fields that uniquely
|
|
114
|
+
identify a given entity in the system
|
|
115
|
+
:param id: internal id of the alert (user should not supply it)
|
|
116
|
+
:param state: state of the alert, may be active/inactive (user should not supply it)
|
|
117
|
+
:param created: when the alert is created (user should not supply it)
|
|
118
|
+
:param count: internal counter of the alert (user should not supply it)
|
|
119
|
+
"""
|
|
57
120
|
self.project = project
|
|
58
121
|
self.name = name
|
|
59
122
|
self.description = description
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -149,7 +149,7 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
149
149
|
entities: EventEntities
|
|
150
150
|
trigger: AlertTrigger
|
|
151
151
|
criteria: Optional[AlertCriteria]
|
|
152
|
-
reset_policy: ResetPolicy = ResetPolicy.
|
|
152
|
+
reset_policy: ResetPolicy = ResetPolicy.AUTO
|
|
153
153
|
notifications: pydantic.conlist(AlertNotification, min_items=1)
|
|
154
154
|
state: AlertActiveState = AlertActiveState.INACTIVE
|
|
155
155
|
count: Optional[int] = 0
|
|
@@ -185,7 +185,7 @@ class AlertTemplate(
|
|
|
185
185
|
severity: AlertSeverity
|
|
186
186
|
trigger: AlertTrigger
|
|
187
187
|
criteria: Optional[AlertCriteria]
|
|
188
|
-
reset_policy: ResetPolicy = ResetPolicy.
|
|
188
|
+
reset_policy: ResetPolicy = ResetPolicy.AUTO
|
|
189
189
|
|
|
190
190
|
# This is slightly different than __eq__ as it doesn't compare everything
|
|
191
191
|
def templates_differ(self, other):
|
|
@@ -50,15 +50,34 @@ class NotificationLimits(enum.Enum):
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class Notification(pydantic.BaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Notification object schema
|
|
55
|
+
:param kind: notification implementation kind - slack, webhook, etc.
|
|
56
|
+
:param name: for logging and identification
|
|
57
|
+
:param message: message content in the notification
|
|
58
|
+
:param severity: severity to display in the notification
|
|
59
|
+
:param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
|
|
60
|
+
:param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
|
|
61
|
+
to evaluate if the notification should be sent in addition to the 'when' statuses.
|
|
62
|
+
e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
|
|
63
|
+
:param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
|
|
64
|
+
git repository details, etc.)
|
|
65
|
+
:param secret_params: secret parameters for the notification implementation, same as params but will be stored
|
|
66
|
+
in a k8s secret and passed as a secret reference to the implementation.
|
|
67
|
+
:param status: notification status - pending, sent, error
|
|
68
|
+
:param sent_time: time the notification was sent
|
|
69
|
+
:param reason: failure reason if the notification failed to send
|
|
70
|
+
"""
|
|
71
|
+
|
|
53
72
|
kind: NotificationKind
|
|
54
73
|
name: str
|
|
55
74
|
message: str
|
|
56
75
|
severity: NotificationSeverity
|
|
57
76
|
when: list[str]
|
|
58
|
-
condition: str = None
|
|
59
|
-
params: dict[str, typing.Any] = None
|
|
60
|
-
status: NotificationStatus = None
|
|
61
|
-
sent_time: typing.Union[str, datetime.datetime] = None
|
|
77
|
+
condition: typing.Optional[str] = None
|
|
78
|
+
params: typing.Optional[dict[str, typing.Any]] = None
|
|
79
|
+
status: typing.Optional[NotificationStatus] = None
|
|
80
|
+
sent_time: typing.Optional[typing.Union[str, datetime.datetime]] = None
|
|
62
81
|
secret_params: typing.Optional[dict[str, typing.Any]] = None
|
|
63
82
|
reason: typing.Optional[str] = None
|
|
64
83
|
|
mlrun/config.py
CHANGED
|
@@ -1166,6 +1166,7 @@ class Config:
|
|
|
1166
1166
|
)
|
|
1167
1167
|
elif kind == "stream": # return list for mlrun<1.6.3 BC
|
|
1168
1168
|
return [
|
|
1169
|
+
# TODO: remove the first stream in 1.9.0
|
|
1169
1170
|
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1170
1171
|
project=project,
|
|
1171
1172
|
kind=kind,
|
mlrun/datastore/s3.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import time
|
|
16
16
|
|
|
17
17
|
import boto3
|
|
18
|
+
from boto3.s3.transfer import TransferConfig
|
|
18
19
|
from fsspec.registry import get_filesystem_class
|
|
19
20
|
|
|
20
21
|
import mlrun.errors
|
|
@@ -40,6 +41,12 @@ class S3Store(DataStore):
|
|
|
40
41
|
profile_name = self._get_secret_or_env("AWS_PROFILE")
|
|
41
42
|
assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
|
|
42
43
|
|
|
44
|
+
self.config = TransferConfig(
|
|
45
|
+
multipart_threshold=1024 * 1024 * 25,
|
|
46
|
+
max_concurrency=10,
|
|
47
|
+
multipart_chunksize=1024 * 1024 * 25,
|
|
48
|
+
)
|
|
49
|
+
|
|
43
50
|
# If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
|
|
44
51
|
if assume_role_arn:
|
|
45
52
|
client = boto3.client(
|
|
@@ -166,7 +173,7 @@ class S3Store(DataStore):
|
|
|
166
173
|
|
|
167
174
|
def upload(self, key, src_path):
|
|
168
175
|
bucket, key = self.get_bucket_and_key(key)
|
|
169
|
-
self.s3.
|
|
176
|
+
self.s3.Bucket(bucket).upload_file(src_path, key, Config=self.config)
|
|
170
177
|
|
|
171
178
|
def get(self, key, size=None, offset=0):
|
|
172
179
|
bucket, key = self.get_bucket_and_key(key)
|
mlrun/datastore/spark_utils.py
CHANGED
|
@@ -13,7 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
16
18
|
import mlrun
|
|
19
|
+
from mlrun.features import Entity
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str]:
|
|
@@ -35,3 +38,30 @@ def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str
|
|
|
35
38
|
else:
|
|
36
39
|
non_hadoop_spark_options[key] = value
|
|
37
40
|
return non_hadoop_spark_options
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def check_special_columns_exists(
|
|
44
|
+
spark_df, entities: list[Union[Entity, str]], timestamp_key: str, label_column: str
|
|
45
|
+
):
|
|
46
|
+
columns = spark_df.columns
|
|
47
|
+
entities = entities or []
|
|
48
|
+
entities = [
|
|
49
|
+
entity.name if isinstance(entity, Entity) else entity for entity in entities
|
|
50
|
+
]
|
|
51
|
+
missing_entities = [entity for entity in entities if entity not in columns]
|
|
52
|
+
cases_message = "Please check the letter cases (uppercase or lowercase)"
|
|
53
|
+
if missing_entities:
|
|
54
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
55
|
+
f"There are missing entities from dataframe during ingestion. missing_entities: {missing_entities}."
|
|
56
|
+
f" {cases_message}"
|
|
57
|
+
)
|
|
58
|
+
if timestamp_key and timestamp_key not in columns:
|
|
59
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
60
|
+
f"timestamp_key is missing from dataframe during ingestion. timestamp_key: {timestamp_key}."
|
|
61
|
+
f" {cases_message}"
|
|
62
|
+
)
|
|
63
|
+
if label_column and label_column not in columns:
|
|
64
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
65
|
+
f"label_column is missing from dataframe during ingestion. label_column: {label_column}. "
|
|
66
|
+
f"{cases_message}"
|
|
67
|
+
)
|
mlrun/feature_store/api.py
CHANGED
|
@@ -1032,6 +1032,8 @@ def _ingest_with_spark(
|
|
|
1032
1032
|
try:
|
|
1033
1033
|
import pyspark.sql
|
|
1034
1034
|
|
|
1035
|
+
from mlrun.datastore.spark_utils import check_special_columns_exists
|
|
1036
|
+
|
|
1035
1037
|
if spark is None or spark is True:
|
|
1036
1038
|
# create spark context
|
|
1037
1039
|
|
|
@@ -1050,7 +1052,6 @@ def _ingest_with_spark(
|
|
|
1050
1052
|
created_spark_context = True
|
|
1051
1053
|
|
|
1052
1054
|
timestamp_key = featureset.spec.timestamp_key
|
|
1053
|
-
|
|
1054
1055
|
if isinstance(source, pd.DataFrame):
|
|
1055
1056
|
df = spark.createDataFrame(source)
|
|
1056
1057
|
elif isinstance(source, pyspark.sql.DataFrame):
|
|
@@ -1080,6 +1081,12 @@ def _ingest_with_spark(
|
|
|
1080
1081
|
target = get_target_driver(target, featureset)
|
|
1081
1082
|
target.set_resource(featureset)
|
|
1082
1083
|
if featureset.spec.passthrough and target.is_offline:
|
|
1084
|
+
check_special_columns_exists(
|
|
1085
|
+
spark_df=df,
|
|
1086
|
+
entities=featureset.spec.entities,
|
|
1087
|
+
timestamp_key=timestamp_key,
|
|
1088
|
+
label_column=featureset.spec.label_column,
|
|
1089
|
+
)
|
|
1083
1090
|
continue
|
|
1084
1091
|
spark_options = target.get_spark_options(
|
|
1085
1092
|
key_columns, timestamp_key, overwrite
|
|
@@ -1090,6 +1097,17 @@ def _ingest_with_spark(
|
|
|
1090
1097
|
df_to_write, key_columns, timestamp_key, spark_options
|
|
1091
1098
|
)
|
|
1092
1099
|
write_format = spark_options.pop("format", None)
|
|
1100
|
+
# We can get to this point if the column exists in different letter cases,
|
|
1101
|
+
# so PySpark will be able to read it, but we still have to raise an exception for it.
|
|
1102
|
+
|
|
1103
|
+
# This check is here and not in to_spark_df because in spark_merger we can have a target
|
|
1104
|
+
# that has different letter cases than the source, like in SnowflakeTarget.
|
|
1105
|
+
check_special_columns_exists(
|
|
1106
|
+
spark_df=df_to_write,
|
|
1107
|
+
entities=featureset.spec.entities,
|
|
1108
|
+
timestamp_key=timestamp_key,
|
|
1109
|
+
label_column=featureset.spec.label_column,
|
|
1110
|
+
)
|
|
1093
1111
|
if overwrite:
|
|
1094
1112
|
write_spark_dataframe_with_options(
|
|
1095
1113
|
spark_options, df_to_write, "overwrite", write_format=write_format
|
mlrun/feature_store/steps.py
CHANGED
|
@@ -743,3 +743,11 @@ class DropFeatures(StepToDict, MLRunStep):
|
|
|
743
743
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
744
744
|
f"DropFeatures can only drop features, not entities: {dropped_entities}"
|
|
745
745
|
)
|
|
746
|
+
if feature_set.spec.label_column in features:
|
|
747
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
748
|
+
f"DropFeatures can not drop label_column: {feature_set.spec.label_column}"
|
|
749
|
+
)
|
|
750
|
+
if feature_set.spec.timestamp_key in features:
|
|
751
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
752
|
+
f"DropFeatures can not drop timestamp_key: {feature_set.spec.timestamp_key}"
|
|
753
|
+
)
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -252,14 +252,31 @@ def _model_endpoint_validations(
|
|
|
252
252
|
In case of discrepancy between the provided `sample_set_statistics` and the
|
|
253
253
|
`model_endpoints.spec.feature_stats`, a warning will be presented to the user.
|
|
254
254
|
"""
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
255
|
+
|
|
256
|
+
# Model Path
|
|
257
|
+
if model_path:
|
|
258
|
+
# Generate the parsed model uri that is based on hash, key, iter, and tree
|
|
259
|
+
model_obj = mlrun.datastore.get_store_resource(model_path)
|
|
260
|
+
|
|
261
|
+
model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
|
|
262
|
+
project=model_endpoint.metadata.project,
|
|
263
|
+
key=model_obj.key,
|
|
264
|
+
iter=model_obj.iter,
|
|
265
|
+
tree=model_obj.tree,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Enrich the uri schema with the store prefix
|
|
269
|
+
model_artifact_uri = mlrun.datastore.get_store_uri(
|
|
270
|
+
kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
|
|
261
271
|
)
|
|
262
272
|
|
|
273
|
+
if model_endpoint.spec.model_uri != model_artifact_uri:
|
|
274
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
275
|
+
f"provided model store path {model_path} does not match "
|
|
276
|
+
f"the path that is stored under the existing model "
|
|
277
|
+
f"endpoint record: {model_endpoint.spec.model_uri}"
|
|
278
|
+
)
|
|
279
|
+
|
|
263
280
|
# Feature stats
|
|
264
281
|
if (
|
|
265
282
|
sample_set_statistics
|
|
@@ -605,5 +622,5 @@ def _create_model_monitoring_function_base(
|
|
|
605
622
|
name="PushToMonitoringWriter",
|
|
606
623
|
project=project,
|
|
607
624
|
writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
608
|
-
)
|
|
625
|
+
)
|
|
609
626
|
return func_obj
|
|
@@ -19,6 +19,8 @@ import mlrun.common.helpers
|
|
|
19
19
|
import mlrun.common.model_monitoring.helpers
|
|
20
20
|
import mlrun.common.schemas.model_monitoring.constants as mm_constant
|
|
21
21
|
import mlrun.datastore
|
|
22
|
+
import mlrun.serving
|
|
23
|
+
import mlrun.utils.helpers
|
|
22
24
|
import mlrun.utils.v3io_clients
|
|
23
25
|
from mlrun.model_monitoring.helpers import get_stream_path
|
|
24
26
|
from mlrun.serving.utils import StepToDict
|
|
@@ -33,8 +35,8 @@ class _PushToMonitoringWriter(StepToDict):
|
|
|
33
35
|
|
|
34
36
|
def __init__(
|
|
35
37
|
self,
|
|
36
|
-
project:
|
|
37
|
-
writer_application_name:
|
|
38
|
+
project: str,
|
|
39
|
+
writer_application_name: str,
|
|
38
40
|
stream_uri: Optional[str] = None,
|
|
39
41
|
name: Optional[str] = None,
|
|
40
42
|
):
|
|
@@ -108,6 +110,7 @@ class _PushToMonitoringWriter(StepToDict):
|
|
|
108
110
|
f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
|
|
109
111
|
)
|
|
110
112
|
self.output_stream.push([writer_event])
|
|
113
|
+
logger.info(f"Pushed data to {self.stream_uri} successfully")
|
|
111
114
|
|
|
112
115
|
def _lazy_init(self):
|
|
113
116
|
if self.output_stream is None:
|
|
@@ -149,9 +152,15 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
149
152
|
|
|
150
153
|
@staticmethod
|
|
151
154
|
def _create_mlrun_context(app_name: str):
|
|
155
|
+
artifact_path = mlrun.utils.helpers.template_artifact_path(
|
|
156
|
+
mlrun.mlconf.artifact_path, mlrun.mlconf.default_project
|
|
157
|
+
)
|
|
152
158
|
context = mlrun.get_or_create_ctx(
|
|
153
159
|
f"{app_name}-logger",
|
|
154
|
-
|
|
160
|
+
spec={
|
|
161
|
+
"metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}},
|
|
162
|
+
"spec": {mlrun.utils.helpers.RunKeys.output_path: artifact_path},
|
|
163
|
+
},
|
|
155
164
|
)
|
|
156
165
|
context.__class__ = MonitoringApplicationContext
|
|
157
166
|
return context
|
|
@@ -17,6 +17,7 @@ from typing import Any, Union, cast
|
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import pandas as pd
|
|
20
|
+
from deprecated import deprecated
|
|
20
21
|
|
|
21
22
|
import mlrun
|
|
22
23
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
@@ -112,6 +113,13 @@ class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
|
|
|
112
113
|
raise NotImplementedError
|
|
113
114
|
|
|
114
115
|
|
|
116
|
+
# TODO: Remove in 1.9.0
|
|
117
|
+
@deprecated(
|
|
118
|
+
version="1.7.0",
|
|
119
|
+
reason="The `ModelMonitoringApplicationBase` class is deprecated from "
|
|
120
|
+
"version 1.7.0 and will be removed in version 1.9.0. "
|
|
121
|
+
"Use `ModelMonitoringApplicationBaseV2` as your application's base class.",
|
|
122
|
+
)
|
|
115
123
|
class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
116
124
|
"""
|
|
117
125
|
A base class for a model monitoring application.
|
|
@@ -14,10 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
|
-
from
|
|
17
|
+
from abc import ABC
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import semver
|
|
21
|
+
from deprecated import deprecated
|
|
21
22
|
|
|
22
23
|
import mlrun.model_monitoring.applications.base as mm_base
|
|
23
24
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
@@ -57,14 +58,22 @@ except ModuleNotFoundError:
|
|
|
57
58
|
|
|
58
59
|
|
|
59
60
|
if _HAS_EVIDENTLY:
|
|
60
|
-
from evidently.
|
|
61
|
-
from evidently.suite.base_suite import Suite
|
|
61
|
+
from evidently.suite.base_suite import Display
|
|
62
62
|
from evidently.ui.type_aliases import STR_UUID
|
|
63
63
|
from evidently.ui.workspace import Workspace
|
|
64
64
|
from evidently.utils.dashboard import TemplateParams, file_html_template
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
# TODO: Remove in 1.9.0
|
|
68
|
+
@deprecated(
|
|
69
|
+
version="1.7.0",
|
|
70
|
+
reason="The `EvidentlyModelMonitoringApplicationBase` class is deprecated from "
|
|
71
|
+
"version 1.7.0 and will be removed in version 1.9.0. "
|
|
72
|
+
"Use `EvidentlyModelMonitoringApplicationBaseV2` as your application's base class.",
|
|
73
|
+
)
|
|
74
|
+
class EvidentlyModelMonitoringApplicationBase(
|
|
75
|
+
mm_base.ModelMonitoringApplicationBase, ABC
|
|
76
|
+
):
|
|
68
77
|
def __init__(
|
|
69
78
|
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
70
79
|
) -> None:
|
|
@@ -86,12 +95,12 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
|
|
|
86
95
|
)
|
|
87
96
|
|
|
88
97
|
def log_evidently_object(
|
|
89
|
-
self, evidently_object:
|
|
90
|
-
):
|
|
98
|
+
self, evidently_object: "Display", artifact_name: str
|
|
99
|
+
) -> None:
|
|
91
100
|
"""
|
|
92
101
|
Logs an Evidently report or suite as an artifact.
|
|
93
102
|
|
|
94
|
-
:param evidently_object: (
|
|
103
|
+
:param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
|
|
95
104
|
:param artifact_name: (str) The name for the logged artifact.
|
|
96
105
|
"""
|
|
97
106
|
evidently_object_html = evidently_object.get_html()
|
|
@@ -122,18 +131,14 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
|
|
|
122
131
|
additional_graphs={},
|
|
123
132
|
)
|
|
124
133
|
|
|
125
|
-
dashboard_html =
|
|
134
|
+
dashboard_html = file_html_template(params=template_params)
|
|
126
135
|
self.context.log_artifact(
|
|
127
136
|
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
128
137
|
)
|
|
129
138
|
|
|
130
|
-
@staticmethod
|
|
131
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
132
|
-
return temple_func(params=template_params)
|
|
133
|
-
|
|
134
139
|
|
|
135
140
|
class EvidentlyModelMonitoringApplicationBaseV2(
|
|
136
|
-
mm_base.ModelMonitoringApplicationBaseV2
|
|
141
|
+
mm_base.ModelMonitoringApplicationBaseV2, ABC
|
|
137
142
|
):
|
|
138
143
|
def __init__(
|
|
139
144
|
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
@@ -160,14 +165,14 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
160
165
|
@staticmethod
|
|
161
166
|
def log_evidently_object(
|
|
162
167
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
163
|
-
evidently_object:
|
|
168
|
+
evidently_object: "Display",
|
|
164
169
|
artifact_name: str,
|
|
165
|
-
):
|
|
170
|
+
) -> None:
|
|
166
171
|
"""
|
|
167
172
|
Logs an Evidently report or suite as an artifact.
|
|
168
173
|
|
|
169
174
|
:param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
|
|
170
|
-
:param evidently_object: (
|
|
175
|
+
:param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
|
|
171
176
|
:param artifact_name: (str) The name for the logged artifact.
|
|
172
177
|
"""
|
|
173
178
|
evidently_object_html = evidently_object.get_html()
|
|
@@ -181,7 +186,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
181
186
|
timestamp_start: pd.Timestamp,
|
|
182
187
|
timestamp_end: pd.Timestamp,
|
|
183
188
|
artifact_name: str = "dashboard",
|
|
184
|
-
):
|
|
189
|
+
) -> None:
|
|
185
190
|
"""
|
|
186
191
|
Logs an Evidently project dashboard.
|
|
187
192
|
|
|
@@ -200,11 +205,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
200
205
|
additional_graphs={},
|
|
201
206
|
)
|
|
202
207
|
|
|
203
|
-
dashboard_html =
|
|
208
|
+
dashboard_html = file_html_template(params=template_params)
|
|
204
209
|
monitoring_context.log_artifact(
|
|
205
210
|
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
206
211
|
)
|
|
207
|
-
|
|
208
|
-
@staticmethod
|
|
209
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
210
|
-
return temple_func(params=template_params)
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import concurrent.futures
|
|
16
16
|
import datetime
|
|
17
17
|
import json
|
|
18
|
+
import multiprocessing
|
|
18
19
|
import os
|
|
19
20
|
import re
|
|
20
21
|
from collections.abc import Iterator
|
|
@@ -363,7 +364,10 @@ class MonitoringApplicationController:
|
|
|
363
364
|
return
|
|
364
365
|
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
365
366
|
with concurrent.futures.ProcessPoolExecutor(
|
|
366
|
-
max_workers=min(len(endpoints), 10)
|
|
367
|
+
max_workers=min(len(endpoints), 10),
|
|
368
|
+
# On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
|
|
369
|
+
# and resources (such as sockets), which is not what we want (ML-7160)
|
|
370
|
+
mp_context=multiprocessing.get_context("spawn"),
|
|
367
371
|
) as pool:
|
|
368
372
|
for endpoint in endpoints:
|
|
369
373
|
if (
|
|
@@ -18,6 +18,7 @@ from sqlalchemy.ext.declarative import declarative_base, declared_attr
|
|
|
18
18
|
|
|
19
19
|
from mlrun.common.schemas.model_monitoring import (
|
|
20
20
|
EventFieldType,
|
|
21
|
+
ResultData,
|
|
21
22
|
WriterEvent,
|
|
22
23
|
)
|
|
23
24
|
|
|
@@ -32,6 +33,13 @@ Base = declarative_base()
|
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
class ModelEndpointsTable(Base, ModelEndpointsBaseTable):
|
|
36
|
+
feature_stats = Column(
|
|
37
|
+
EventFieldType.FEATURE_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
38
|
+
)
|
|
39
|
+
current_stats = Column(
|
|
40
|
+
EventFieldType.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
41
|
+
)
|
|
42
|
+
metrics = Column(EventFieldType.METRICS, sqlalchemy.dialects.mysql.MEDIUMTEXT)
|
|
35
43
|
first_request = Column(
|
|
36
44
|
EventFieldType.FIRST_REQUEST,
|
|
37
45
|
# TODO: migrate to DATETIME, see ML-6921
|
|
@@ -72,7 +80,12 @@ class _ApplicationResultOrMetric:
|
|
|
72
80
|
class ApplicationResultTable(
|
|
73
81
|
Base, _ApplicationResultOrMetric, ApplicationResultBaseTable
|
|
74
82
|
):
|
|
75
|
-
|
|
83
|
+
result_extra_data = Column(
|
|
84
|
+
ResultData.RESULT_EXTRA_DATA, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
85
|
+
)
|
|
86
|
+
current_stats = Column(
|
|
87
|
+
ResultData.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
88
|
+
)
|
|
76
89
|
|
|
77
90
|
|
|
78
91
|
class ApplicationMetricsTable(
|
|
@@ -350,7 +350,7 @@ class KVStoreBase(StoreBase):
|
|
|
350
350
|
table_path = self._get_results_table_path(endpoint_id)
|
|
351
351
|
key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
|
|
352
352
|
metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
|
|
353
|
-
attributes = {metric_name: json.dumps(event)}
|
|
353
|
+
attributes = {metric_name: self._encode_field(json.dumps(event))}
|
|
354
354
|
else:
|
|
355
355
|
raise ValueError(f"Invalid {kind = }")
|
|
356
356
|
|
|
@@ -17,6 +17,7 @@ from abc import ABC, abstractmethod
|
|
|
17
17
|
from datetime import datetime
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
|
+
import pydantic
|
|
20
21
|
|
|
21
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
22
23
|
import mlrun.model_monitoring.db.tsdb.helpers
|
|
@@ -289,19 +290,27 @@ class TSDBConnector(ABC):
|
|
|
289
290
|
full_name = mlrun.model_monitoring.helpers._compose_full_name(
|
|
290
291
|
project=project, app=app_name, name=name
|
|
291
292
|
)
|
|
292
|
-
|
|
293
|
-
|
|
293
|
+
try:
|
|
294
|
+
metrics_values.append(
|
|
295
|
+
mm_schemas.ModelEndpointMonitoringResultValues(
|
|
296
|
+
full_name=full_name,
|
|
297
|
+
result_kind=result_kind,
|
|
298
|
+
values=list(
|
|
299
|
+
zip(
|
|
300
|
+
sub_df.index,
|
|
301
|
+
sub_df[mm_schemas.ResultData.RESULT_VALUE],
|
|
302
|
+
sub_df[mm_schemas.ResultData.RESULT_STATUS],
|
|
303
|
+
)
|
|
304
|
+
), # pyright: ignore[reportArgumentType]
|
|
305
|
+
)
|
|
306
|
+
)
|
|
307
|
+
except pydantic.ValidationError:
|
|
308
|
+
logger.exception(
|
|
309
|
+
"Failed to convert data-frame into `ModelEndpointMonitoringResultValues`",
|
|
294
310
|
full_name=full_name,
|
|
295
|
-
|
|
296
|
-
values=list(
|
|
297
|
-
zip(
|
|
298
|
-
sub_df.index,
|
|
299
|
-
sub_df[mm_schemas.ResultData.RESULT_VALUE],
|
|
300
|
-
sub_df[mm_schemas.ResultData.RESULT_STATUS],
|
|
301
|
-
)
|
|
302
|
-
), # pyright: ignore[reportArgumentType]
|
|
311
|
+
sub_df_json=sub_df.to_json(),
|
|
303
312
|
)
|
|
304
|
-
|
|
313
|
+
raise
|
|
305
314
|
del metrics_without_data[full_name]
|
|
306
315
|
|
|
307
316
|
for metric in metrics_without_data.values():
|
|
@@ -45,8 +45,7 @@ class _BatchDict(typing.TypedDict):
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
def get_stream_path(
|
|
48
|
-
project: str =
|
|
49
|
-
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
48
|
+
project: str, function_name: str = mm_constants.MonitoringFunctionNames.STREAM
|
|
50
49
|
) -> str:
|
|
51
50
|
"""
|
|
52
51
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
@@ -557,6 +557,26 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
557
557
|
|
|
558
558
|
# Separate each model invocation into sub events that will be stored as dictionary
|
|
559
559
|
# in list of events. This list will be used as the body for the storey event.
|
|
560
|
+
if not isinstance(features, list):
|
|
561
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
562
|
+
"Model's inputs must be a list"
|
|
563
|
+
)
|
|
564
|
+
features = (
|
|
565
|
+
features
|
|
566
|
+
if not any(not isinstance(feat, list) for feat in features)
|
|
567
|
+
else [features]
|
|
568
|
+
)
|
|
569
|
+
if not isinstance(predictions, list):
|
|
570
|
+
predictions = [[predictions]]
|
|
571
|
+
elif isinstance(predictions, list) and len(predictions) == len(features):
|
|
572
|
+
pass # predictions are already in the right format
|
|
573
|
+
else:
|
|
574
|
+
predictions = (
|
|
575
|
+
predictions
|
|
576
|
+
if not any(not isinstance(pred, list) for pred in predictions)
|
|
577
|
+
else [predictions]
|
|
578
|
+
)
|
|
579
|
+
|
|
560
580
|
events = []
|
|
561
581
|
for i, (feature, prediction) in enumerate(zip(features, predictions)):
|
|
562
582
|
if not isinstance(prediction, list):
|