mlrun 1.7.0rc34__py3-none-any.whl → 1.7.0rc36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +1 -0
- mlrun/common/schemas/__init__.py +0 -1
- mlrun/common/schemas/api_gateway.py +1 -1
- mlrun/common/schemas/model_monitoring/__init__.py +1 -2
- mlrun/common/schemas/model_monitoring/constants.py +3 -16
- mlrun/common/schemas/notification.py +1 -1
- mlrun/common/types.py +1 -0
- mlrun/config.py +7 -7
- mlrun/datastore/sources.py +8 -4
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/db/base.py +2 -3
- mlrun/db/httpdb.py +3 -3
- mlrun/feature_store/api.py +19 -1
- mlrun/feature_store/steps.py +8 -0
- mlrun/model.py +1 -1
- mlrun/model_monitoring/api.py +23 -6
- mlrun/model_monitoring/applications/_application_steps.py +4 -0
- mlrun/model_monitoring/applications/base.py +8 -0
- mlrun/model_monitoring/applications/evidently_base.py +27 -27
- mlrun/model_monitoring/controller.py +5 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +5 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +2 -2
- mlrun/model_monitoring/db/tsdb/base.py +6 -3
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +22 -3
- mlrun/model_monitoring/stream_processing.py +25 -153
- mlrun/projects/pipelines.py +76 -73
- mlrun/run.py +4 -0
- mlrun/runtimes/nuclio/api_gateway.py +1 -1
- mlrun/runtimes/nuclio/application/application.py +25 -2
- mlrun/runtimes/nuclio/function.py +5 -0
- mlrun/runtimes/nuclio/serving.py +1 -1
- mlrun/runtimes/pod.py +2 -4
- mlrun/runtimes/utils.py +18 -0
- mlrun/serving/states.py +10 -3
- mlrun/serving/v2_serving.py +5 -2
- mlrun/utils/db.py +15 -0
- mlrun/utils/helpers.py +27 -14
- mlrun/utils/http.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc34.dist-info → mlrun-1.7.0rc36.dist-info}/METADATA +3 -1
- {mlrun-1.7.0rc34.dist-info → mlrun-1.7.0rc36.dist-info}/RECORD +46 -47
- {mlrun-1.7.0rc34.dist-info → mlrun-1.7.0rc36.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/prometheus.py +0 -216
- {mlrun-1.7.0rc34.dist-info → mlrun-1.7.0rc36.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc34.dist-info → mlrun-1.7.0rc36.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc34.dist-info → mlrun-1.7.0rc36.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py
CHANGED
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .constants import (
|
|
16
|
+
V3IO_MODEL_MONITORING_DB,
|
|
16
17
|
ControllerPolicy,
|
|
17
18
|
DriftStatus,
|
|
18
19
|
EndpointType,
|
|
@@ -31,8 +32,6 @@ from .constants import (
|
|
|
31
32
|
MonitoringFunctionNames,
|
|
32
33
|
PredictionsQueryConstants,
|
|
33
34
|
ProjectSecretKeys,
|
|
34
|
-
PrometheusEndpoints,
|
|
35
|
-
PrometheusMetric,
|
|
36
35
|
ResultData,
|
|
37
36
|
ResultKindApp,
|
|
38
37
|
SchedulingKeys,
|
|
@@ -170,7 +170,6 @@ class StreamKind(MonitoringStrEnum):
|
|
|
170
170
|
class TSDBTarget(MonitoringStrEnum):
|
|
171
171
|
V3IO_TSDB = "v3io-tsdb"
|
|
172
172
|
TDEngine = "tdengine"
|
|
173
|
-
PROMETHEUS = "prometheus"
|
|
174
173
|
|
|
175
174
|
|
|
176
175
|
class ProjectSecretKeys:
|
|
@@ -231,21 +230,6 @@ class EndpointType(IntEnum):
|
|
|
231
230
|
LEAF_EP = 3 # end point that is a child of a router
|
|
232
231
|
|
|
233
232
|
|
|
234
|
-
class PrometheusMetric:
|
|
235
|
-
PREDICTIONS_TOTAL = "predictions_total"
|
|
236
|
-
MODEL_LATENCY_SECONDS = "model_latency_seconds"
|
|
237
|
-
INCOME_FEATURES = "income_features"
|
|
238
|
-
ERRORS_TOTAL = "errors_total"
|
|
239
|
-
DRIFT_METRICS = "drift_metrics"
|
|
240
|
-
DRIFT_STATUS = "drift_status"
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
class PrometheusEndpoints(MonitoringStrEnum):
|
|
244
|
-
MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
|
|
245
|
-
MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
|
|
246
|
-
MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
|
|
247
|
-
|
|
248
|
-
|
|
249
233
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
250
234
|
STREAM = "model-monitoring-stream"
|
|
251
235
|
APPLICATION_CONTROLLER = "model-monitoring-controller"
|
|
@@ -381,3 +365,6 @@ class SpecialApps:
|
|
|
381
365
|
|
|
382
366
|
|
|
383
367
|
_RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
V3IO_MODEL_MONITORING_DB = "v3io"
|
|
@@ -55,7 +55,7 @@ class Notification(pydantic.BaseModel):
|
|
|
55
55
|
message: str
|
|
56
56
|
severity: NotificationSeverity
|
|
57
57
|
when: list[str]
|
|
58
|
-
condition: str
|
|
58
|
+
condition: str = None
|
|
59
59
|
params: dict[str, typing.Any] = None
|
|
60
60
|
status: NotificationStatus = None
|
|
61
61
|
sent_time: typing.Union[str, datetime.datetime] = None
|
mlrun/common/types.py
CHANGED
mlrun/config.py
CHANGED
|
@@ -485,10 +485,10 @@ default_config = {
|
|
|
485
485
|
# pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
|
|
486
486
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
487
487
|
"mlrun_version_specifier": "",
|
|
488
|
-
"kaniko_image": "gcr.io/kaniko-project/executor:v1.
|
|
488
|
+
"kaniko_image": "gcr.io/kaniko-project/executor:v1.23.2", # kaniko builder image
|
|
489
489
|
"kaniko_init_container_image": "alpine:3.18",
|
|
490
490
|
# image for kaniko init container when docker registry is ECR
|
|
491
|
-
"kaniko_aws_cli_image": "amazon/aws-cli:2.
|
|
491
|
+
"kaniko_aws_cli_image": "amazon/aws-cli:2.17.16",
|
|
492
492
|
# kaniko sometimes fails to get filesystem from image, this is a workaround to retry the process
|
|
493
493
|
# a known issue in Kaniko - https://github.com/GoogleContainerTools/kaniko/issues/1717
|
|
494
494
|
"kaniko_image_fs_extraction_retries": "3",
|
|
@@ -1166,6 +1166,7 @@ class Config:
|
|
|
1166
1166
|
)
|
|
1167
1167
|
elif kind == "stream": # return list for mlrun<1.6.3 BC
|
|
1168
1168
|
return [
|
|
1169
|
+
# TODO: remove the first stream in 1.9.0
|
|
1169
1170
|
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1170
1171
|
project=project,
|
|
1171
1172
|
kind=kind,
|
|
@@ -1237,12 +1238,11 @@ class Config:
|
|
|
1237
1238
|
|
|
1238
1239
|
return storage_options
|
|
1239
1240
|
|
|
1240
|
-
def
|
|
1241
|
-
if not version:
|
|
1242
|
-
version = self.nuclio_version
|
|
1241
|
+
def is_explicit_ack_enabled(self) -> bool:
|
|
1243
1242
|
return self.httpdb.nuclio.explicit_ack == "enabled" and (
|
|
1244
|
-
not
|
|
1245
|
-
or semver.VersionInfo.parse(
|
|
1243
|
+
not self.nuclio_version
|
|
1244
|
+
or semver.VersionInfo.parse(self.nuclio_version)
|
|
1245
|
+
>= semver.VersionInfo.parse("1.12.10")
|
|
1246
1246
|
)
|
|
1247
1247
|
|
|
1248
1248
|
|
mlrun/datastore/sources.py
CHANGED
|
@@ -85,7 +85,8 @@ class BaseSourceDriver(DataSource):
|
|
|
85
85
|
)
|
|
86
86
|
|
|
87
87
|
explicit_ack = (
|
|
88
|
-
is_explicit_ack_supported(context)
|
|
88
|
+
is_explicit_ack_supported(context)
|
|
89
|
+
and mlrun.mlconf.is_explicit_ack_enabled()
|
|
89
90
|
)
|
|
90
91
|
return storey.SyncEmitSource(
|
|
91
92
|
context=context,
|
|
@@ -944,7 +945,8 @@ class OnlineSource(BaseSourceDriver):
|
|
|
944
945
|
|
|
945
946
|
source_args = self.attributes.get("source_args", {})
|
|
946
947
|
explicit_ack = (
|
|
947
|
-
is_explicit_ack_supported(context)
|
|
948
|
+
is_explicit_ack_supported(context)
|
|
949
|
+
and mlrun.mlconf.is_explicit_ack_enabled()
|
|
948
950
|
)
|
|
949
951
|
# TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
|
|
950
952
|
src_class = storey.SyncEmitSource(
|
|
@@ -1029,7 +1031,8 @@ class StreamSource(OnlineSource):
|
|
|
1029
1031
|
engine = "async"
|
|
1030
1032
|
if hasattr(function.spec, "graph") and function.spec.graph.engine:
|
|
1031
1033
|
engine = function.spec.graph.engine
|
|
1032
|
-
|
|
1034
|
+
|
|
1035
|
+
if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
|
|
1033
1036
|
kwargs["explicit_ack_mode"] = "explicitOnly"
|
|
1034
1037
|
kwargs["worker_allocation_mode"] = "static"
|
|
1035
1038
|
|
|
@@ -1116,7 +1119,8 @@ class KafkaSource(OnlineSource):
|
|
|
1116
1119
|
engine = "async"
|
|
1117
1120
|
if hasattr(function.spec, "graph") and function.spec.graph.engine:
|
|
1118
1121
|
engine = function.spec.graph.engine
|
|
1119
|
-
|
|
1122
|
+
|
|
1123
|
+
if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
|
|
1120
1124
|
explicit_ack_mode = "explicitOnly"
|
|
1121
1125
|
extra_attributes["workerAllocationMode"] = extra_attributes.get(
|
|
1122
1126
|
"worker_allocation_mode", "static"
|
mlrun/datastore/spark_utils.py
CHANGED
|
@@ -13,7 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
16
18
|
import mlrun
|
|
19
|
+
from mlrun.features import Entity
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str]:
|
|
@@ -35,3 +38,30 @@ def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str
|
|
|
35
38
|
else:
|
|
36
39
|
non_hadoop_spark_options[key] = value
|
|
37
40
|
return non_hadoop_spark_options
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def check_special_columns_exists(
|
|
44
|
+
spark_df, entities: list[Union[Entity, str]], timestamp_key: str, label_column: str
|
|
45
|
+
):
|
|
46
|
+
columns = spark_df.columns
|
|
47
|
+
entities = entities or []
|
|
48
|
+
entities = [
|
|
49
|
+
entity.name if isinstance(entity, Entity) else entity for entity in entities
|
|
50
|
+
]
|
|
51
|
+
missing_entities = [entity for entity in entities if entity not in columns]
|
|
52
|
+
cases_message = "Please check the letter cases (uppercase or lowercase)"
|
|
53
|
+
if missing_entities:
|
|
54
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
55
|
+
f"There are missing entities from dataframe during ingestion. missing_entities: {missing_entities}."
|
|
56
|
+
f" {cases_message}"
|
|
57
|
+
)
|
|
58
|
+
if timestamp_key and timestamp_key not in columns:
|
|
59
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
60
|
+
f"timestamp_key is missing from dataframe during ingestion. timestamp_key: {timestamp_key}."
|
|
61
|
+
f" {cases_message}"
|
|
62
|
+
)
|
|
63
|
+
if label_column and label_column not in columns:
|
|
64
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
65
|
+
f"label_column is missing from dataframe during ingestion. label_column: {label_column}. "
|
|
66
|
+
f"{cases_message}"
|
|
67
|
+
)
|
mlrun/db/base.py
CHANGED
|
@@ -242,9 +242,8 @@ class RunDBInterface(ABC):
|
|
|
242
242
|
)
|
|
243
243
|
artifact_identifiers.append(
|
|
244
244
|
mlrun.common.schemas.ArtifactIdentifier(
|
|
245
|
-
key
|
|
246
|
-
|
|
247
|
-
# pass the tree as uid
|
|
245
|
+
# we pass the db_key and not the key so the API will be able to find the artifact in the db
|
|
246
|
+
key=mlrun.utils.get_in_artifact(artifact_obj, "db_key"),
|
|
248
247
|
uid=mlrun.utils.get_in_artifact(artifact_obj, "uid"),
|
|
249
248
|
producer_id=mlrun.utils.get_in_artifact(artifact_obj, "tree"),
|
|
250
249
|
kind=mlrun.utils.get_in_artifact(artifact_obj, "kind"),
|
mlrun/db/httpdb.py
CHANGED
|
@@ -1015,7 +1015,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1015
1015
|
"format": format_,
|
|
1016
1016
|
"tag": tag,
|
|
1017
1017
|
"tree": tree,
|
|
1018
|
-
"
|
|
1018
|
+
"object-uid": uid,
|
|
1019
1019
|
}
|
|
1020
1020
|
if iter is not None:
|
|
1021
1021
|
params["iter"] = str(iter)
|
|
@@ -1051,7 +1051,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1051
1051
|
"key": key,
|
|
1052
1052
|
"tag": tag,
|
|
1053
1053
|
"tree": tree,
|
|
1054
|
-
"
|
|
1054
|
+
"object-uid": uid,
|
|
1055
1055
|
"iter": iter,
|
|
1056
1056
|
"deletion_strategy": deletion_strategy,
|
|
1057
1057
|
}
|
|
@@ -3380,7 +3380,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3380
3380
|
By default, the image is mlrun/mlrun.
|
|
3381
3381
|
"""
|
|
3382
3382
|
self.api_call(
|
|
3383
|
-
method=mlrun.common.types.HTTPMethod.
|
|
3383
|
+
method=mlrun.common.types.HTTPMethod.PATCH,
|
|
3384
3384
|
path=f"projects/{project}/model-monitoring/model-monitoring-controller",
|
|
3385
3385
|
params={
|
|
3386
3386
|
"base_period": base_period,
|
mlrun/feature_store/api.py
CHANGED
|
@@ -1032,6 +1032,8 @@ def _ingest_with_spark(
|
|
|
1032
1032
|
try:
|
|
1033
1033
|
import pyspark.sql
|
|
1034
1034
|
|
|
1035
|
+
from mlrun.datastore.spark_utils import check_special_columns_exists
|
|
1036
|
+
|
|
1035
1037
|
if spark is None or spark is True:
|
|
1036
1038
|
# create spark context
|
|
1037
1039
|
|
|
@@ -1050,7 +1052,6 @@ def _ingest_with_spark(
|
|
|
1050
1052
|
created_spark_context = True
|
|
1051
1053
|
|
|
1052
1054
|
timestamp_key = featureset.spec.timestamp_key
|
|
1053
|
-
|
|
1054
1055
|
if isinstance(source, pd.DataFrame):
|
|
1055
1056
|
df = spark.createDataFrame(source)
|
|
1056
1057
|
elif isinstance(source, pyspark.sql.DataFrame):
|
|
@@ -1080,6 +1081,12 @@ def _ingest_with_spark(
|
|
|
1080
1081
|
target = get_target_driver(target, featureset)
|
|
1081
1082
|
target.set_resource(featureset)
|
|
1082
1083
|
if featureset.spec.passthrough and target.is_offline:
|
|
1084
|
+
check_special_columns_exists(
|
|
1085
|
+
spark_df=df,
|
|
1086
|
+
entities=featureset.spec.entities,
|
|
1087
|
+
timestamp_key=timestamp_key,
|
|
1088
|
+
label_column=featureset.spec.label_column,
|
|
1089
|
+
)
|
|
1083
1090
|
continue
|
|
1084
1091
|
spark_options = target.get_spark_options(
|
|
1085
1092
|
key_columns, timestamp_key, overwrite
|
|
@@ -1090,6 +1097,17 @@ def _ingest_with_spark(
|
|
|
1090
1097
|
df_to_write, key_columns, timestamp_key, spark_options
|
|
1091
1098
|
)
|
|
1092
1099
|
write_format = spark_options.pop("format", None)
|
|
1100
|
+
# We can get to this point if the column exists in different letter cases,
|
|
1101
|
+
# so PySpark will be able to read it, but we still have to raise an exception for it.
|
|
1102
|
+
|
|
1103
|
+
# This check is here and not in to_spark_df because in spark_merger we can have a target
|
|
1104
|
+
# that has different letter cases than the source, like in SnowflakeTarget.
|
|
1105
|
+
check_special_columns_exists(
|
|
1106
|
+
spark_df=df_to_write,
|
|
1107
|
+
entities=featureset.spec.entities,
|
|
1108
|
+
timestamp_key=timestamp_key,
|
|
1109
|
+
label_column=featureset.spec.label_column,
|
|
1110
|
+
)
|
|
1093
1111
|
if overwrite:
|
|
1094
1112
|
write_spark_dataframe_with_options(
|
|
1095
1113
|
spark_options, df_to_write, "overwrite", write_format=write_format
|
mlrun/feature_store/steps.py
CHANGED
|
@@ -743,3 +743,11 @@ class DropFeatures(StepToDict, MLRunStep):
|
|
|
743
743
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
744
744
|
f"DropFeatures can only drop features, not entities: {dropped_entities}"
|
|
745
745
|
)
|
|
746
|
+
if feature_set.spec.label_column in features:
|
|
747
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
748
|
+
f"DropFeatures can not drop label_column: {feature_set.spec.label_column}"
|
|
749
|
+
)
|
|
750
|
+
if feature_set.spec.timestamp_key in features:
|
|
751
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
752
|
+
f"DropFeatures can not drop timestamp_key: {feature_set.spec.timestamp_key}"
|
|
753
|
+
)
|
mlrun/model.py
CHANGED
|
@@ -754,7 +754,7 @@ class Notification(ModelObj):
|
|
|
754
754
|
"Both 'secret_params' and 'params' are empty, at least one must be defined."
|
|
755
755
|
)
|
|
756
756
|
|
|
757
|
-
notification_class.validate_params(secret_params
|
|
757
|
+
notification_class.validate_params(secret_params | params)
|
|
758
758
|
|
|
759
759
|
@staticmethod
|
|
760
760
|
def validate_notification_uniqueness(notifications: list["Notification"]):
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -252,14 +252,31 @@ def _model_endpoint_validations(
|
|
|
252
252
|
In case of discrepancy between the provided `sample_set_statistics` and the
|
|
253
253
|
`model_endpoints.spec.feature_stats`, a warning will be presented to the user.
|
|
254
254
|
"""
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
255
|
+
|
|
256
|
+
# Model Path
|
|
257
|
+
if model_path:
|
|
258
|
+
# Generate the parsed model uri that is based on hash, key, iter, and tree
|
|
259
|
+
model_obj = mlrun.datastore.get_store_resource(model_path)
|
|
260
|
+
|
|
261
|
+
model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
|
|
262
|
+
project=model_endpoint.metadata.project,
|
|
263
|
+
key=model_obj.key,
|
|
264
|
+
iter=model_obj.iter,
|
|
265
|
+
tree=model_obj.tree,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Enrich the uri schema with the store prefix
|
|
269
|
+
model_artifact_uri = mlrun.datastore.get_store_uri(
|
|
270
|
+
kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
|
|
261
271
|
)
|
|
262
272
|
|
|
273
|
+
if model_endpoint.spec.model_uri != model_artifact_uri:
|
|
274
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
275
|
+
f"provided model store path {model_path} does not match "
|
|
276
|
+
f"the path that is stored under the existing model "
|
|
277
|
+
f"endpoint record: {model_endpoint.spec.model_uri}"
|
|
278
|
+
)
|
|
279
|
+
|
|
263
280
|
# Feature stats
|
|
264
281
|
if (
|
|
265
282
|
sample_set_statistics
|
|
@@ -19,6 +19,7 @@ import mlrun.common.helpers
|
|
|
19
19
|
import mlrun.common.model_monitoring.helpers
|
|
20
20
|
import mlrun.common.schemas.model_monitoring.constants as mm_constant
|
|
21
21
|
import mlrun.datastore
|
|
22
|
+
import mlrun.serving
|
|
22
23
|
import mlrun.utils.v3io_clients
|
|
23
24
|
from mlrun.model_monitoring.helpers import get_stream_path
|
|
24
25
|
from mlrun.serving.utils import StepToDict
|
|
@@ -151,6 +152,9 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
151
152
|
def _create_mlrun_context(app_name: str):
|
|
152
153
|
context = mlrun.get_or_create_ctx(
|
|
153
154
|
f"{app_name}-logger",
|
|
155
|
+
spec={
|
|
156
|
+
"metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}}
|
|
157
|
+
},
|
|
154
158
|
upload_artifacts=True,
|
|
155
159
|
)
|
|
156
160
|
context.__class__ = MonitoringApplicationContext
|
|
@@ -17,6 +17,7 @@ from typing import Any, Union, cast
|
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import pandas as pd
|
|
20
|
+
from deprecated import deprecated
|
|
20
21
|
|
|
21
22
|
import mlrun
|
|
22
23
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
@@ -112,6 +113,13 @@ class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
|
|
|
112
113
|
raise NotImplementedError
|
|
113
114
|
|
|
114
115
|
|
|
116
|
+
# TODO: Remove in 1.9.0
|
|
117
|
+
@deprecated(
|
|
118
|
+
version="1.7.0",
|
|
119
|
+
reason="The `ModelMonitoringApplicationBase` class is deprecated from "
|
|
120
|
+
"version 1.7.0 and will be removed in version 1.9.0. "
|
|
121
|
+
"Use `ModelMonitoringApplicationBaseV2` as your application's base class.",
|
|
122
|
+
)
|
|
115
123
|
class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
116
124
|
"""
|
|
117
125
|
A base class for a model monitoring application.
|
|
@@ -14,16 +14,17 @@
|
|
|
14
14
|
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
|
-
from
|
|
17
|
+
from abc import ABC
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import semver
|
|
21
|
+
from deprecated import deprecated
|
|
21
22
|
|
|
22
23
|
import mlrun.model_monitoring.applications.base as mm_base
|
|
23
24
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
24
25
|
from mlrun.errors import MLRunIncompatibleVersionError
|
|
25
26
|
|
|
26
|
-
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.
|
|
27
|
+
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.32")
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
|
|
@@ -57,15 +58,22 @@ except ModuleNotFoundError:
|
|
|
57
58
|
|
|
58
59
|
|
|
59
60
|
if _HAS_EVIDENTLY:
|
|
60
|
-
from evidently.
|
|
61
|
-
from evidently.report.report import Report
|
|
62
|
-
from evidently.suite.base_suite import Suite
|
|
61
|
+
from evidently.suite.base_suite import Display
|
|
63
62
|
from evidently.ui.type_aliases import STR_UUID
|
|
64
63
|
from evidently.ui.workspace import Workspace
|
|
65
|
-
from evidently.utils.dashboard import TemplateParams
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
64
|
+
from evidently.utils.dashboard import TemplateParams, file_html_template
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# TODO: Remove in 1.9.0
|
|
68
|
+
@deprecated(
|
|
69
|
+
version="1.7.0",
|
|
70
|
+
reason="The `EvidentlyModelMonitoringApplicationBase` class is deprecated from "
|
|
71
|
+
"version 1.7.0 and will be removed in version 1.9.0. "
|
|
72
|
+
"Use `EvidentlyModelMonitoringApplicationBaseV2` as your application's base class.",
|
|
73
|
+
)
|
|
74
|
+
class EvidentlyModelMonitoringApplicationBase(
|
|
75
|
+
mm_base.ModelMonitoringApplicationBase, ABC
|
|
76
|
+
):
|
|
69
77
|
def __init__(
|
|
70
78
|
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
71
79
|
) -> None:
|
|
@@ -87,12 +95,12 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
|
|
|
87
95
|
)
|
|
88
96
|
|
|
89
97
|
def log_evidently_object(
|
|
90
|
-
self, evidently_object:
|
|
91
|
-
):
|
|
98
|
+
self, evidently_object: "Display", artifact_name: str
|
|
99
|
+
) -> None:
|
|
92
100
|
"""
|
|
93
101
|
Logs an Evidently report or suite as an artifact.
|
|
94
102
|
|
|
95
|
-
:param evidently_object: (
|
|
103
|
+
:param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
|
|
96
104
|
:param artifact_name: (str) The name for the logged artifact.
|
|
97
105
|
"""
|
|
98
106
|
evidently_object_html = evidently_object.get_html()
|
|
@@ -123,18 +131,14 @@ class EvidentlyModelMonitoringApplicationBase(mm_base.ModelMonitoringApplication
|
|
|
123
131
|
additional_graphs={},
|
|
124
132
|
)
|
|
125
133
|
|
|
126
|
-
dashboard_html =
|
|
134
|
+
dashboard_html = file_html_template(params=template_params)
|
|
127
135
|
self.context.log_artifact(
|
|
128
136
|
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
129
137
|
)
|
|
130
138
|
|
|
131
|
-
@staticmethod
|
|
132
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
133
|
-
return temple_func(params=template_params)
|
|
134
|
-
|
|
135
139
|
|
|
136
140
|
class EvidentlyModelMonitoringApplicationBaseV2(
|
|
137
|
-
mm_base.ModelMonitoringApplicationBaseV2
|
|
141
|
+
mm_base.ModelMonitoringApplicationBaseV2, ABC
|
|
138
142
|
):
|
|
139
143
|
def __init__(
|
|
140
144
|
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
@@ -161,14 +165,14 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
161
165
|
@staticmethod
|
|
162
166
|
def log_evidently_object(
|
|
163
167
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
164
|
-
evidently_object:
|
|
168
|
+
evidently_object: "Display",
|
|
165
169
|
artifact_name: str,
|
|
166
|
-
):
|
|
170
|
+
) -> None:
|
|
167
171
|
"""
|
|
168
172
|
Logs an Evidently report or suite as an artifact.
|
|
169
173
|
|
|
170
174
|
:param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
|
|
171
|
-
:param evidently_object: (
|
|
175
|
+
:param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
|
|
172
176
|
:param artifact_name: (str) The name for the logged artifact.
|
|
173
177
|
"""
|
|
174
178
|
evidently_object_html = evidently_object.get_html()
|
|
@@ -182,7 +186,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
182
186
|
timestamp_start: pd.Timestamp,
|
|
183
187
|
timestamp_end: pd.Timestamp,
|
|
184
188
|
artifact_name: str = "dashboard",
|
|
185
|
-
):
|
|
189
|
+
) -> None:
|
|
186
190
|
"""
|
|
187
191
|
Logs an Evidently project dashboard.
|
|
188
192
|
|
|
@@ -201,11 +205,7 @@ class EvidentlyModelMonitoringApplicationBaseV2(
|
|
|
201
205
|
additional_graphs={},
|
|
202
206
|
)
|
|
203
207
|
|
|
204
|
-
dashboard_html =
|
|
208
|
+
dashboard_html = file_html_template(params=template_params)
|
|
205
209
|
monitoring_context.log_artifact(
|
|
206
210
|
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
207
211
|
)
|
|
208
|
-
|
|
209
|
-
@staticmethod
|
|
210
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
211
|
-
return temple_func(params=template_params)
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import concurrent.futures
|
|
16
16
|
import datetime
|
|
17
17
|
import json
|
|
18
|
+
import multiprocessing
|
|
18
19
|
import os
|
|
19
20
|
import re
|
|
20
21
|
from collections.abc import Iterator
|
|
@@ -363,7 +364,10 @@ class MonitoringApplicationController:
|
|
|
363
364
|
return
|
|
364
365
|
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
365
366
|
with concurrent.futures.ProcessPoolExecutor(
|
|
366
|
-
max_workers=min(len(endpoints), 10)
|
|
367
|
+
max_workers=min(len(endpoints), 10),
|
|
368
|
+
# On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
|
|
369
|
+
# and resources (such as sockets), which is not what we want (ML-7160)
|
|
370
|
+
mp_context=multiprocessing.get_context("spawn"),
|
|
367
371
|
) as pool:
|
|
368
372
|
for endpoint in endpoints:
|
|
369
373
|
if (
|
|
@@ -177,6 +177,11 @@ class SQLStoreBase(StoreBase):
|
|
|
177
177
|
param table: SQLAlchemy declarative table.
|
|
178
178
|
:param criteria: A list of binary expressions that filter the query.
|
|
179
179
|
"""
|
|
180
|
+
if not self._engine.has_table(table.__tablename__):
|
|
181
|
+
logger.debug(
|
|
182
|
+
f"Table {table.__tablename__} does not exist in the database. Skipping deletion."
|
|
183
|
+
)
|
|
184
|
+
return
|
|
180
185
|
with create_session(dsn=self._sql_connection_string) as session:
|
|
181
186
|
# Generate and commit the delete query
|
|
182
187
|
session.query(
|
|
@@ -408,14 +408,14 @@ class KVStoreBase(StoreBase):
|
|
|
408
408
|
|
|
409
409
|
"""
|
|
410
410
|
try:
|
|
411
|
-
|
|
411
|
+
response = self.client.kv.get(
|
|
412
412
|
container=self._get_monitoring_schedules_container(
|
|
413
413
|
project_name=self.project
|
|
414
414
|
),
|
|
415
415
|
table_path=endpoint_id,
|
|
416
416
|
key=application_name,
|
|
417
417
|
)
|
|
418
|
-
return
|
|
418
|
+
return response.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
|
|
419
419
|
except v3io.dataplane.response.HttpResponseError as err:
|
|
420
420
|
logger.debug("Error while getting last analyzed time", err=err)
|
|
421
421
|
raise mlrun.errors.MLRunNotFoundError(
|
|
@@ -27,7 +27,7 @@ from mlrun.utils import logger
|
|
|
27
27
|
class TSDBConnector(ABC):
|
|
28
28
|
type: typing.ClassVar[str]
|
|
29
29
|
|
|
30
|
-
def __init__(self, project: str):
|
|
30
|
+
def __init__(self, project: str) -> None:
|
|
31
31
|
"""
|
|
32
32
|
Initialize a new TSDB connector. The connector is used to interact with the TSDB and store monitoring data.
|
|
33
33
|
At the moment we have 3 different types of monitoring data:
|
|
@@ -42,10 +42,10 @@ class TSDBConnector(ABC):
|
|
|
42
42
|
writer.
|
|
43
43
|
|
|
44
44
|
:param project: the name of the project.
|
|
45
|
-
|
|
46
45
|
"""
|
|
47
46
|
self.project = project
|
|
48
47
|
|
|
48
|
+
@abstractmethod
|
|
49
49
|
def apply_monitoring_stream_steps(self, graph):
|
|
50
50
|
"""
|
|
51
51
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
@@ -58,6 +58,7 @@ class TSDBConnector(ABC):
|
|
|
58
58
|
"""
|
|
59
59
|
pass
|
|
60
60
|
|
|
61
|
+
@abstractmethod
|
|
61
62
|
def write_application_event(
|
|
62
63
|
self,
|
|
63
64
|
event: dict,
|
|
@@ -69,13 +70,14 @@ class TSDBConnector(ABC):
|
|
|
69
70
|
:raise mlrun.errors.MLRunRuntimeError: If an error occurred while writing the event.
|
|
70
71
|
"""
|
|
71
72
|
|
|
73
|
+
@abstractmethod
|
|
72
74
|
def delete_tsdb_resources(self):
|
|
73
75
|
"""
|
|
74
76
|
Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
|
|
75
77
|
"""
|
|
76
|
-
|
|
77
78
|
pass
|
|
78
79
|
|
|
80
|
+
@abstractmethod
|
|
79
81
|
def get_model_endpoint_real_time_metrics(
|
|
80
82
|
self,
|
|
81
83
|
endpoint_id: str,
|
|
@@ -102,6 +104,7 @@ class TSDBConnector(ABC):
|
|
|
102
104
|
"""
|
|
103
105
|
pass
|
|
104
106
|
|
|
107
|
+
@abstractmethod
|
|
105
108
|
def create_tables(self) -> None:
|
|
106
109
|
"""
|
|
107
110
|
Create the TSDB tables using the TSDB connector. At the moment we support 3 types of tables:
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import json
|
|
17
16
|
|
|
@@ -21,8 +20,6 @@ from mlrun.common.schemas.model_monitoring import (
|
|
|
21
20
|
EventKeyMetrics,
|
|
22
21
|
)
|
|
23
22
|
|
|
24
|
-
_TABLE_COLUMN = "table_column"
|
|
25
|
-
|
|
26
23
|
|
|
27
24
|
class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
|
|
28
25
|
def __init__(self, **kwargs):
|