mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -109
- mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +20 -41
- mlrun/artifacts/model.py +8 -140
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +40 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +7 -4
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +54 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +101 -47
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +4 -2
- mlrun/model.py +25 -11
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +48 -213
- mlrun/model_monitoring/writer.py +101 -121
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +73 -45
- mlrun/render.py +11 -13
- mlrun/run.py +6 -41
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +6 -6
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +9 -35
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +1 -39
- mlrun/utils/helpers.py +72 -71
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +12 -5
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +134 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -42,6 +42,7 @@ def run_merge_job(
|
|
|
42
42
|
start_time=None,
|
|
43
43
|
end_time=None,
|
|
44
44
|
timestamp_for_filtering=None,
|
|
45
|
+
additional_filters=None,
|
|
45
46
|
):
|
|
46
47
|
name = vector.metadata.name
|
|
47
48
|
if not target or not hasattr(target, "to_dict"):
|
|
@@ -116,6 +117,7 @@ def run_merge_job(
|
|
|
116
117
|
"end_time": end_time,
|
|
117
118
|
"timestamp_for_filtering": timestamp_for_filtering,
|
|
118
119
|
"engine_args": engine_args,
|
|
120
|
+
"additional_filters": additional_filters,
|
|
119
121
|
},
|
|
120
122
|
inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
|
|
121
123
|
)
|
|
@@ -114,12 +114,14 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
114
114
|
start_time=None,
|
|
115
115
|
end_time=None,
|
|
116
116
|
time_column=None,
|
|
117
|
+
additional_filters=None,
|
|
117
118
|
):
|
|
118
119
|
df = feature_set.to_dataframe(
|
|
119
120
|
columns=column_names,
|
|
120
121
|
start_time=start_time,
|
|
121
122
|
end_time=end_time,
|
|
122
123
|
time_column=time_column,
|
|
124
|
+
additional_filters=additional_filters,
|
|
123
125
|
)
|
|
124
126
|
if df.index.names[0]:
|
|
125
127
|
df.reset_index(inplace=True)
|
|
@@ -225,7 +225,12 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
225
225
|
start_time=None,
|
|
226
226
|
end_time=None,
|
|
227
227
|
time_column=None,
|
|
228
|
+
additional_filters=None,
|
|
228
229
|
):
|
|
230
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
231
|
+
additional_filters, self.__class__
|
|
232
|
+
)
|
|
233
|
+
|
|
229
234
|
source_kwargs = {}
|
|
230
235
|
if feature_set.spec.passthrough:
|
|
231
236
|
if not feature_set.spec.source:
|
|
@@ -547,9 +547,9 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
547
547
|
"inputs",
|
|
548
548
|
"parameters",
|
|
549
549
|
]:
|
|
550
|
-
text +=
|
|
551
|
-
property_name.capitalize()
|
|
552
|
-
self._markdown_print(value=property_value, tabs=2)
|
|
550
|
+
text += (
|
|
551
|
+
f"\n * **{property_name.capitalize()}**: "
|
|
552
|
+
f"{self._markdown_print(value=property_value, tabs=2)}"
|
|
553
553
|
)
|
|
554
554
|
else:
|
|
555
555
|
for property_name, property_value in self._extract_epoch_results().items():
|
|
@@ -614,13 +614,8 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
614
614
|
:return: The generated link.
|
|
615
615
|
"""
|
|
616
616
|
return (
|
|
617
|
-
'<a href="{}/{}/{}
|
|
618
|
-
|
|
619
|
-
config.ui.projects_prefix,
|
|
620
|
-
context.project,
|
|
621
|
-
context.uid,
|
|
622
|
-
link_text,
|
|
623
|
-
)
|
|
617
|
+
f'<a href="{config.resolve_ui_url()}/{config.ui.projects_prefix}/{context.project}'
|
|
618
|
+
f'/jobs/monitor/{context.uid}/overview" target="_blank">{link_text}</a>'
|
|
624
619
|
)
|
|
625
620
|
|
|
626
621
|
@staticmethod
|
mlrun/launcher/base.py
CHANGED
|
@@ -18,10 +18,11 @@ import os
|
|
|
18
18
|
import uuid
|
|
19
19
|
from typing import Any, Callable, Optional, Union
|
|
20
20
|
|
|
21
|
+
import mlrun_pipelines.common.ops
|
|
22
|
+
|
|
21
23
|
import mlrun.common.schemas
|
|
22
24
|
import mlrun.config
|
|
23
25
|
import mlrun.errors
|
|
24
|
-
import mlrun.kfpops
|
|
25
26
|
import mlrun.lists
|
|
26
27
|
import mlrun.model
|
|
27
28
|
import mlrun.runtimes
|
|
@@ -390,7 +391,7 @@ class BaseLauncher(abc.ABC):
|
|
|
390
391
|
return
|
|
391
392
|
|
|
392
393
|
if result and runtime.kfp and err is None:
|
|
393
|
-
|
|
394
|
+
mlrun_pipelines.common.ops.write_kfpmeta(result)
|
|
394
395
|
|
|
395
396
|
self._log_track_results(runtime.is_child, result, run)
|
|
396
397
|
|
|
@@ -403,7 +404,7 @@ class BaseLauncher(abc.ABC):
|
|
|
403
404
|
)
|
|
404
405
|
if (
|
|
405
406
|
run.status.state
|
|
406
|
-
in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
407
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
407
408
|
):
|
|
408
409
|
if runtime._is_remote and not runtime.is_child:
|
|
409
410
|
logger.error(
|
mlrun/launcher/client.py
CHANGED
|
@@ -71,7 +71,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
71
71
|
):
|
|
72
72
|
run.metadata.labels["kind"] = runtime.kind
|
|
73
73
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
74
|
-
run.metadata.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
74
|
+
run.metadata.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
75
75
|
)
|
|
76
76
|
if run.spec.output_path:
|
|
77
77
|
run.spec.output_path = run.spec.output_path.replace(
|
mlrun/lists.py
CHANGED
|
@@ -21,7 +21,7 @@ import mlrun.frameworks
|
|
|
21
21
|
from .artifacts import Artifact, dict_to_artifact
|
|
22
22
|
from .config import config
|
|
23
23
|
from .render import artifacts_to_html, runs_to_html
|
|
24
|
-
from .utils import flatten, get_artifact_target, get_in
|
|
24
|
+
from .utils import flatten, get_artifact_target, get_in
|
|
25
25
|
|
|
26
26
|
list_header = [
|
|
27
27
|
"project",
|
|
@@ -35,6 +35,7 @@ list_header = [
|
|
|
35
35
|
"parameters",
|
|
36
36
|
"results",
|
|
37
37
|
"artifacts",
|
|
38
|
+
"artifact_uris",
|
|
38
39
|
"error",
|
|
39
40
|
]
|
|
40
41
|
|
|
@@ -62,6 +63,7 @@ class RunList(list):
|
|
|
62
63
|
get_in(run, "spec.parameters", ""),
|
|
63
64
|
get_in(run, "status.results", ""),
|
|
64
65
|
get_in(run, "status.artifacts", []),
|
|
66
|
+
get_in(run, "status.artifact_uris", {}),
|
|
65
67
|
get_in(run, "status.error", ""),
|
|
66
68
|
]
|
|
67
69
|
if extend_iterations and iterations:
|
|
@@ -184,7 +186,7 @@ class ArtifactList(list):
|
|
|
184
186
|
"uri": ["uri", "uri"],
|
|
185
187
|
}
|
|
186
188
|
for artifact in self:
|
|
187
|
-
fields_index =
|
|
189
|
+
fields_index = 1
|
|
188
190
|
row = [get_in(artifact, v[fields_index], "") for k, v in head.items()]
|
|
189
191
|
artifact_uri = dict_to_artifact(artifact).uri
|
|
190
192
|
last_index = len(row) - 1
|
mlrun/model.py
CHANGED
|
@@ -33,7 +33,6 @@ from .utils import (
|
|
|
33
33
|
dict_to_json,
|
|
34
34
|
dict_to_yaml,
|
|
35
35
|
get_artifact_target,
|
|
36
|
-
is_legacy_artifact,
|
|
37
36
|
logger,
|
|
38
37
|
template_artifact_path,
|
|
39
38
|
)
|
|
@@ -1208,6 +1207,7 @@ class RunStatus(ModelObj):
|
|
|
1208
1207
|
ui_url=None,
|
|
1209
1208
|
reason: str = None,
|
|
1210
1209
|
notifications: dict[str, Notification] = None,
|
|
1210
|
+
artifact_uris: dict[str, str] = None,
|
|
1211
1211
|
):
|
|
1212
1212
|
self.state = state or "created"
|
|
1213
1213
|
self.status_text = status_text
|
|
@@ -1222,6 +1222,8 @@ class RunStatus(ModelObj):
|
|
|
1222
1222
|
self.ui_url = ui_url
|
|
1223
1223
|
self.reason = reason
|
|
1224
1224
|
self.notifications = notifications or {}
|
|
1225
|
+
# Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
|
|
1226
|
+
self.artifact_uris = artifact_uris or {}
|
|
1225
1227
|
|
|
1226
1228
|
def is_failed(self) -> Optional[bool]:
|
|
1227
1229
|
"""
|
|
@@ -1435,11 +1437,14 @@ class RunObject(RunTemplate):
|
|
|
1435
1437
|
unknown_error = ""
|
|
1436
1438
|
if (
|
|
1437
1439
|
self.status.state
|
|
1438
|
-
in mlrun.runtimes.constants.RunStates.abortion_states()
|
|
1440
|
+
in mlrun.common.runtimes.constants.RunStates.abortion_states()
|
|
1439
1441
|
):
|
|
1440
1442
|
unknown_error = "Run was aborted"
|
|
1441
1443
|
|
|
1442
|
-
elif
|
|
1444
|
+
elif (
|
|
1445
|
+
self.status.state
|
|
1446
|
+
in mlrun.common.runtimes.constants.RunStates.error_states()
|
|
1447
|
+
):
|
|
1443
1448
|
unknown_error = "Unknown error"
|
|
1444
1449
|
|
|
1445
1450
|
return (
|
|
@@ -1477,7 +1482,7 @@ class RunObject(RunTemplate):
|
|
|
1477
1482
|
outputs = {k: v for k, v in self.status.results.items()}
|
|
1478
1483
|
if self.status.artifacts:
|
|
1479
1484
|
for a in self.status.artifacts:
|
|
1480
|
-
key = a["
|
|
1485
|
+
key = a["metadata"]["key"]
|
|
1481
1486
|
outputs[key] = get_artifact_target(a, self.metadata.project)
|
|
1482
1487
|
return outputs
|
|
1483
1488
|
|
|
@@ -1520,7 +1525,10 @@ class RunObject(RunTemplate):
|
|
|
1520
1525
|
|
|
1521
1526
|
def state(self):
|
|
1522
1527
|
"""current run state"""
|
|
1523
|
-
if
|
|
1528
|
+
if (
|
|
1529
|
+
self.status.state
|
|
1530
|
+
in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1531
|
+
):
|
|
1524
1532
|
return self.status.state
|
|
1525
1533
|
self.refresh()
|
|
1526
1534
|
return self.status.state or "unknown"
|
|
@@ -1534,8 +1542,10 @@ class RunObject(RunTemplate):
|
|
|
1534
1542
|
iter=self.metadata.iteration,
|
|
1535
1543
|
)
|
|
1536
1544
|
if run:
|
|
1537
|
-
|
|
1538
|
-
|
|
1545
|
+
run_status = run.get("status", {})
|
|
1546
|
+
# Artifacts are not stored in the DB, so we need to preserve them here
|
|
1547
|
+
run_status["artifacts"] = self.status.artifacts
|
|
1548
|
+
self.status = RunStatus.from_dict(run_status)
|
|
1539
1549
|
return self
|
|
1540
1550
|
|
|
1541
1551
|
def show(self):
|
|
@@ -1582,7 +1592,7 @@ class RunObject(RunTemplate):
|
|
|
1582
1592
|
last_pull_log_time = None
|
|
1583
1593
|
logs_enabled = show_logs is not False
|
|
1584
1594
|
state = self.state()
|
|
1585
|
-
if state not in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1595
|
+
if state not in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1586
1596
|
logger.info(
|
|
1587
1597
|
f"run {self.metadata.name} is not completed yet, waiting for it to complete",
|
|
1588
1598
|
current_state=state,
|
|
@@ -1592,7 +1602,8 @@ class RunObject(RunTemplate):
|
|
|
1592
1602
|
if (
|
|
1593
1603
|
logs_enabled
|
|
1594
1604
|
and logs_interval
|
|
1595
|
-
and state
|
|
1605
|
+
and state
|
|
1606
|
+
not in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1596
1607
|
and (
|
|
1597
1608
|
last_pull_log_time is None
|
|
1598
1609
|
or (datetime.now() - last_pull_log_time).seconds > logs_interval
|
|
@@ -1601,7 +1612,7 @@ class RunObject(RunTemplate):
|
|
|
1601
1612
|
last_pull_log_time = datetime.now()
|
|
1602
1613
|
state, offset = self.logs(watch=False, offset=offset)
|
|
1603
1614
|
|
|
1604
|
-
if state in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1615
|
+
if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1605
1616
|
if logs_enabled and logs_interval:
|
|
1606
1617
|
self.logs(watch=False, offset=offset)
|
|
1607
1618
|
break
|
|
@@ -1613,7 +1624,10 @@ class RunObject(RunTemplate):
|
|
|
1613
1624
|
)
|
|
1614
1625
|
if logs_enabled and not logs_interval:
|
|
1615
1626
|
self.logs(watch=False)
|
|
1616
|
-
if
|
|
1627
|
+
if (
|
|
1628
|
+
raise_on_failure
|
|
1629
|
+
and state != mlrun.common.runtimes.constants.RunStates.completed
|
|
1630
|
+
):
|
|
1617
1631
|
raise mlrun.errors.MLRunRuntimeError(
|
|
1618
1632
|
f"Task {self.metadata.name} did not complete (state={state})"
|
|
1619
1633
|
)
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
# for backwards compatibility
|
|
17
17
|
|
|
18
|
-
from .db import get_store_object
|
|
18
|
+
from .db import get_store_object, get_tsdb_connector
|
|
19
19
|
from .helpers import get_stream_path
|
|
20
20
|
from .model_endpoint import ModelEndpoint
|
|
21
21
|
from .tracking_policy import TrackingPolicy
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -22,9 +22,10 @@ import pandas as pd
|
|
|
22
22
|
|
|
23
23
|
import mlrun.artifacts
|
|
24
24
|
import mlrun.common.helpers
|
|
25
|
-
import mlrun.common.schemas.model_monitoring.constants as
|
|
25
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
26
26
|
import mlrun.feature_store
|
|
27
27
|
import mlrun.model_monitoring.application
|
|
28
|
+
import mlrun.model_monitoring.applications as mm_app
|
|
28
29
|
import mlrun.serving
|
|
29
30
|
from mlrun.data_types.infer import InferOptions, get_df_stats
|
|
30
31
|
from mlrun.utils import datetime_now, logger
|
|
@@ -48,7 +49,7 @@ def get_or_create_model_endpoint(
|
|
|
48
49
|
sample_set_statistics: dict[str, typing.Any] = None,
|
|
49
50
|
drift_threshold: float = None,
|
|
50
51
|
possible_drift_threshold: float = None,
|
|
51
|
-
monitoring_mode:
|
|
52
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
52
53
|
db_session=None,
|
|
53
54
|
) -> ModelEndpoint:
|
|
54
55
|
"""
|
|
@@ -128,7 +129,7 @@ def record_results(
|
|
|
128
129
|
context: typing.Optional[mlrun.MLClientCtx] = None,
|
|
129
130
|
infer_results_df: typing.Optional[pd.DataFrame] = None,
|
|
130
131
|
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
131
|
-
monitoring_mode:
|
|
132
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
132
133
|
# Deprecated arguments:
|
|
133
134
|
drift_threshold: typing.Optional[float] = None,
|
|
134
135
|
possible_drift_threshold: typing.Optional[float] = None,
|
|
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
|
|
|
282
283
|
# drift and possible drift thresholds
|
|
283
284
|
if drift_threshold:
|
|
284
285
|
current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
285
|
-
|
|
286
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
286
287
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
287
288
|
)
|
|
288
289
|
if current_drift_threshold != drift_threshold:
|
|
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
|
|
|
293
294
|
|
|
294
295
|
if possible_drift_threshold:
|
|
295
296
|
current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
296
|
-
|
|
297
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
297
298
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
298
299
|
)
|
|
299
300
|
if current_possible_drift_threshold != possible_drift_threshold:
|
|
@@ -332,14 +333,14 @@ def write_monitoring_df(
|
|
|
332
333
|
)
|
|
333
334
|
|
|
334
335
|
# Modify the DataFrame to the required structure that will be used later by the monitoring batch job
|
|
335
|
-
if
|
|
336
|
+
if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
|
|
336
337
|
# Initialize timestamp column with the current time
|
|
337
|
-
infer_results_df[
|
|
338
|
+
infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
|
|
338
339
|
|
|
339
340
|
# `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
|
|
340
341
|
# the ingest process
|
|
341
|
-
infer_results_df[
|
|
342
|
-
infer_results_df.set_index(
|
|
342
|
+
infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
343
|
+
infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
|
|
343
344
|
|
|
344
345
|
monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
|
|
345
346
|
|
|
@@ -355,7 +356,7 @@ def _generate_model_endpoint(
|
|
|
355
356
|
sample_set_statistics: dict[str, typing.Any],
|
|
356
357
|
drift_threshold: float,
|
|
357
358
|
possible_drift_threshold: float,
|
|
358
|
-
monitoring_mode:
|
|
359
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
359
360
|
) -> ModelEndpoint:
|
|
360
361
|
"""
|
|
361
362
|
Write a new model endpoint record.
|
|
@@ -394,11 +395,11 @@ def _generate_model_endpoint(
|
|
|
394
395
|
model_endpoint.spec.model_class = "drift-analysis"
|
|
395
396
|
if drift_threshold:
|
|
396
397
|
model_endpoint.spec.monitor_configuration[
|
|
397
|
-
|
|
398
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
398
399
|
] = drift_threshold
|
|
399
400
|
if possible_drift_threshold:
|
|
400
401
|
model_endpoint.spec.monitor_configuration[
|
|
401
|
-
|
|
402
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
402
403
|
] = possible_drift_threshold
|
|
403
404
|
|
|
404
405
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
@@ -589,7 +590,10 @@ def _create_model_monitoring_function_base(
|
|
|
589
590
|
project: str,
|
|
590
591
|
func: typing.Union[str, None] = None,
|
|
591
592
|
application_class: typing.Union[
|
|
592
|
-
str,
|
|
593
|
+
str,
|
|
594
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
595
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
596
|
+
None,
|
|
593
597
|
] = None,
|
|
594
598
|
name: typing.Optional[str] = None,
|
|
595
599
|
image: typing.Optional[str] = None,
|
|
@@ -602,6 +606,20 @@ def _create_model_monitoring_function_base(
|
|
|
602
606
|
Note: this is an internal API only.
|
|
603
607
|
This function does not set the labels or mounts v3io.
|
|
604
608
|
"""
|
|
609
|
+
if isinstance(
|
|
610
|
+
application_class,
|
|
611
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
612
|
+
):
|
|
613
|
+
warnings.warn(
|
|
614
|
+
"The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
|
|
615
|
+
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
616
|
+
FutureWarning,
|
|
617
|
+
)
|
|
618
|
+
if name in mm_constants.MonitoringFunctionNames.list():
|
|
619
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
620
|
+
f"An application cannot have the following names: "
|
|
621
|
+
f"{mm_constants.MonitoringFunctionNames.list()}"
|
|
622
|
+
)
|
|
605
623
|
if func is None:
|
|
606
624
|
func = ""
|
|
607
625
|
func_obj = typing.cast(
|
|
@@ -618,14 +636,19 @@ def _create_model_monitoring_function_base(
|
|
|
618
636
|
),
|
|
619
637
|
)
|
|
620
638
|
graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
|
|
639
|
+
prepare_step = graph.to(
|
|
640
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
|
|
641
|
+
name="PrepareMonitoringEvent",
|
|
642
|
+
application_name=name,
|
|
643
|
+
)
|
|
621
644
|
if isinstance(application_class, str):
|
|
622
|
-
|
|
645
|
+
app_step = prepare_step.to(class_name=application_class, **application_kwargs)
|
|
623
646
|
else:
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
class_name="mlrun.model_monitoring.
|
|
647
|
+
app_step = prepare_step.to(class_name=application_class)
|
|
648
|
+
app_step.to(
|
|
649
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
|
|
627
650
|
name="PushToMonitoringWriter",
|
|
628
651
|
project=project,
|
|
629
|
-
writer_application_name=
|
|
652
|
+
writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
630
653
|
).respond()
|
|
631
654
|
return func_obj
|