mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +0 -105
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +8 -250
- mlrun/artifacts/dataset.py +1 -190
- mlrun/artifacts/manager.py +2 -41
- mlrun/artifacts/model.py +1 -140
- mlrun/artifacts/plots.py +1 -375
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +24 -3
- mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
- mlrun/config.py +3 -3
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +50 -3
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/httpdb.py +4 -4
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/kfpops.py +5 -10
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +2 -2
- mlrun/model.py +18 -9
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +158 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +2 -3
- mlrun/model_monitoring/writer.py +69 -39
- mlrun/platforms/iguazio.py +2 -2
- mlrun/projects/project.py +18 -31
- mlrun/render.py +2 -10
- mlrun/run.py +1 -3
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/function.py +1 -1
- mlrun/runtimes/utils.py +1 -1
- mlrun/utils/helpers.py +27 -40
- mlrun/utils/notifications/notification/slack.py +4 -2
- mlrun/utils/notifications/notification_pusher.py +133 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
- mlrun/runtimes/mpijob/v1alpha1.py +0 -29
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
|
@@ -409,7 +409,7 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
409
409
|
+ "_"
|
|
410
410
|
+ event[mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME]
|
|
411
411
|
+ "_"
|
|
412
|
-
+ event[mlrun.common.schemas.model_monitoring.
|
|
412
|
+
+ event[mlrun.common.schemas.model_monitoring.ResultData.RESULT_NAME]
|
|
413
413
|
)
|
|
414
414
|
|
|
415
415
|
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import json
|
|
17
16
|
import os
|
|
@@ -41,7 +40,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
41
40
|
client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
|
|
42
41
|
"""
|
|
43
42
|
|
|
44
|
-
def __init__(self, project: str, access_key: str):
|
|
43
|
+
def __init__(self, project: str, access_key: typing.Optional[str] = None) -> None:
|
|
45
44
|
super().__init__(project=project)
|
|
46
45
|
# Initialize a V3IO client instance
|
|
47
46
|
self.access_key = access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
@@ -410,7 +409,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
410
409
|
mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME
|
|
411
410
|
)
|
|
412
411
|
metric_name = event.pop(
|
|
413
|
-
mlrun.common.schemas.model_monitoring.
|
|
412
|
+
mlrun.common.schemas.model_monitoring.ResultData.RESULT_NAME
|
|
414
413
|
)
|
|
415
414
|
attributes = {metric_name: json.dumps(event)}
|
|
416
415
|
|
|
@@ -446,7 +445,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
446
445
|
"""Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
|
|
447
446
|
fields = [
|
|
448
447
|
{
|
|
449
|
-
"name": mlrun.common.schemas.model_monitoring.
|
|
448
|
+
"name": mlrun.common.schemas.model_monitoring.ResultData.RESULT_NAME,
|
|
450
449
|
"type": "string",
|
|
451
450
|
"nullable": False,
|
|
452
451
|
}
|
|
@@ -703,3 +702,67 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
703
702
|
@staticmethod
|
|
704
703
|
def _get_monitoring_schedules_container(project_name: str) -> str:
|
|
705
704
|
return f"users/pipelines/{project_name}/monitoring-schedules/functions"
|
|
705
|
+
|
|
706
|
+
def _extract_metrics_from_items(
|
|
707
|
+
self, app_items: list[dict[str, str]]
|
|
708
|
+
) -> list[mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetric]:
|
|
709
|
+
metrics: list[
|
|
710
|
+
mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetric
|
|
711
|
+
] = []
|
|
712
|
+
for app_item in app_items:
|
|
713
|
+
# See https://www.iguazio.com/docs/latest-release/services/data-layer/reference/system-attributes/#sys-attr-__name
|
|
714
|
+
app_name = app_item.pop("__name")
|
|
715
|
+
if app_name == ".#schema":
|
|
716
|
+
continue
|
|
717
|
+
for result_name in app_item:
|
|
718
|
+
metrics.append(
|
|
719
|
+
mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetric(
|
|
720
|
+
project=self.project,
|
|
721
|
+
app=app_name,
|
|
722
|
+
type=mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetricType.RESULT,
|
|
723
|
+
name=result_name,
|
|
724
|
+
full_name=".".join(
|
|
725
|
+
[
|
|
726
|
+
self.project,
|
|
727
|
+
app_name,
|
|
728
|
+
mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetricType.RESULT,
|
|
729
|
+
result_name,
|
|
730
|
+
]
|
|
731
|
+
),
|
|
732
|
+
)
|
|
733
|
+
)
|
|
734
|
+
return metrics
|
|
735
|
+
|
|
736
|
+
def get_model_endpoint_metrics(
|
|
737
|
+
self, endpoint_id: str
|
|
738
|
+
) -> list[mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetric]:
|
|
739
|
+
"""Get model monitoring results and metrics on the endpoint"""
|
|
740
|
+
metrics: list[
|
|
741
|
+
mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetric
|
|
742
|
+
] = []
|
|
743
|
+
container = self.get_v3io_monitoring_apps_container(self.project)
|
|
744
|
+
try:
|
|
745
|
+
response = self.client.kv.scan(container=container, table_path=endpoint_id)
|
|
746
|
+
except v3io.dataplane.response.HttpResponseError as err:
|
|
747
|
+
if err.status_code == HTTPStatus.NOT_FOUND:
|
|
748
|
+
logger.warning(
|
|
749
|
+
"Attempt getting metrics and results - no data. Check the "
|
|
750
|
+
"project name, endpoint, or wait for the applications to start.",
|
|
751
|
+
container=container,
|
|
752
|
+
table_path=endpoint_id,
|
|
753
|
+
)
|
|
754
|
+
return []
|
|
755
|
+
raise
|
|
756
|
+
|
|
757
|
+
while True:
|
|
758
|
+
metrics.extend(self._extract_metrics_from_items(response.output.items))
|
|
759
|
+
if response.output.last:
|
|
760
|
+
break
|
|
761
|
+
# TODO: Use AIO client: `v3io.aio.dataplane.client.Client`
|
|
762
|
+
response = self.client.kv.scan(
|
|
763
|
+
container=container,
|
|
764
|
+
table_path=endpoint_id,
|
|
765
|
+
marker=response.output.next_marker,
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
return metrics
|
|
@@ -12,121 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
from mlrun.errors import MLRunIncompatibleVersionError
|
|
23
|
-
from mlrun.model_monitoring.application import ModelMonitoringApplicationBase
|
|
24
|
-
|
|
25
|
-
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.11")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
|
|
29
|
-
if ref.is_compatible(cur) or (
|
|
30
|
-
cur.major == ref.major == 0 and cur.minor == ref.minor and cur.patch > ref.patch
|
|
31
|
-
):
|
|
32
|
-
return
|
|
33
|
-
if cur.major == ref.major == 0 and cur.minor > ref.minor:
|
|
34
|
-
warnings.warn(
|
|
35
|
-
f"Evidently version {cur} is not compatible with the tested "
|
|
36
|
-
f"version {ref}, use at your own risk."
|
|
37
|
-
)
|
|
38
|
-
else:
|
|
39
|
-
raise MLRunIncompatibleVersionError(
|
|
40
|
-
f"Evidently version {cur} is not supported, please change to "
|
|
41
|
-
f"{ref} (or another compatible version)."
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
_HAS_EVIDENTLY = False
|
|
46
|
-
try:
|
|
47
|
-
import evidently # noqa: F401
|
|
48
|
-
|
|
49
|
-
_check_evidently_version(
|
|
50
|
-
cur=semver.Version.parse(evidently.__version__),
|
|
51
|
-
ref=SUPPORTED_EVIDENTLY_VERSION,
|
|
52
|
-
)
|
|
53
|
-
_HAS_EVIDENTLY = True
|
|
54
|
-
except ModuleNotFoundError:
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if _HAS_EVIDENTLY:
|
|
59
|
-
from evidently.renderers.notebook_utils import determine_template
|
|
60
|
-
from evidently.report.report import Report
|
|
61
|
-
from evidently.suite.base_suite import Suite
|
|
62
|
-
from evidently.ui.type_aliases import STR_UUID
|
|
63
|
-
from evidently.ui.workspace import Workspace
|
|
64
|
-
from evidently.utils.dashboard import TemplateParams
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class EvidentlyModelMonitoringApplicationBase(ModelMonitoringApplicationBase):
|
|
68
|
-
def __init__(
|
|
69
|
-
self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
|
|
70
|
-
) -> None:
|
|
71
|
-
"""
|
|
72
|
-
A class for integrating Evidently for mlrun model monitoring within a monitoring application.
|
|
73
|
-
Note: evidently is not installed by default in the mlrun/mlrun image.
|
|
74
|
-
It must be installed separately to use this class.
|
|
75
|
-
|
|
76
|
-
:param evidently_workspace_path: (str) The path to the Evidently workspace.
|
|
77
|
-
:param evidently_project_id: (str) The ID of the Evidently project.
|
|
78
|
-
|
|
79
|
-
"""
|
|
80
|
-
if not _HAS_EVIDENTLY:
|
|
81
|
-
raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
|
|
82
|
-
self.evidently_workspace = Workspace.create(evidently_workspace_path)
|
|
83
|
-
self.evidently_project_id = evidently_project_id
|
|
84
|
-
self.evidently_project = self.evidently_workspace.get_project(
|
|
85
|
-
evidently_project_id
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
def log_evidently_object(
|
|
89
|
-
self, evidently_object: Union["Report", "Suite"], artifact_name: str
|
|
90
|
-
):
|
|
91
|
-
"""
|
|
92
|
-
Logs an Evidently report or suite as an artifact.
|
|
93
|
-
|
|
94
|
-
:param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
|
|
95
|
-
:param artifact_name: (str) The name for the logged artifact.
|
|
96
|
-
"""
|
|
97
|
-
evidently_object_html = evidently_object.get_html()
|
|
98
|
-
self.context.log_artifact(
|
|
99
|
-
artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
def log_project_dashboard(
|
|
103
|
-
self,
|
|
104
|
-
timestamp_start: pd.Timestamp,
|
|
105
|
-
timestamp_end: pd.Timestamp,
|
|
106
|
-
artifact_name: str = "dashboard",
|
|
107
|
-
):
|
|
108
|
-
"""
|
|
109
|
-
Logs an Evidently project dashboard.
|
|
110
|
-
|
|
111
|
-
:param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
|
|
112
|
-
:param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
|
|
113
|
-
:param artifact_name: (str) The name for the logged artifact.
|
|
114
|
-
"""
|
|
115
|
-
|
|
116
|
-
dashboard_info = self.evidently_project.build_dashboard_info(
|
|
117
|
-
timestamp_start, timestamp_end
|
|
118
|
-
)
|
|
119
|
-
template_params = TemplateParams(
|
|
120
|
-
dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
|
|
121
|
-
dashboard_info=dashboard_info,
|
|
122
|
-
additional_graphs={},
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
dashboard_html = self._render(determine_template("inline"), template_params)
|
|
126
|
-
self.context.log_artifact(
|
|
127
|
-
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
@staticmethod
|
|
131
|
-
def _render(temple_func, template_params: "TemplateParams"):
|
|
132
|
-
return temple_func(params=template_params)
|
|
15
|
+
# TODO : delete this file in 1.9.0
|
|
16
|
+
from mlrun.model_monitoring.applications import ( # noqa: F401
|
|
17
|
+
_HAS_EVIDENTLY,
|
|
18
|
+
SUPPORTED_EVIDENTLY_VERSION,
|
|
19
|
+
EvidentlyModelMonitoringApplicationBase,
|
|
20
|
+
)
|
|
@@ -215,7 +215,7 @@ def update_model_endpoint_last_request(
|
|
|
215
215
|
|
|
216
216
|
def calculate_inputs_statistics(
|
|
217
217
|
sample_set_statistics: dict, inputs: pd.DataFrame
|
|
218
|
-
) ->
|
|
218
|
+
) -> mlrun.common.model_monitoring.helpers.FeatureStats:
|
|
219
219
|
"""
|
|
220
220
|
Calculate the inputs data statistics for drift monitoring purpose.
|
|
221
221
|
|
|
@@ -17,6 +17,7 @@ from dataclasses import dataclass, field
|
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
19
|
import mlrun.model
|
|
20
|
+
from mlrun.common.model_monitoring.helpers import FeatureStats
|
|
20
21
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
21
22
|
EndpointType,
|
|
22
23
|
EventKeyMetrics,
|
|
@@ -42,8 +43,8 @@ class ModelEndpointSpec(mlrun.model.ModelObj):
|
|
|
42
43
|
|
|
43
44
|
@dataclass
|
|
44
45
|
class ModelEndpointStatus(mlrun.model.ModelObj):
|
|
45
|
-
feature_stats:
|
|
46
|
-
current_stats:
|
|
46
|
+
feature_stats: FeatureStats = field(default_factory=dict)
|
|
47
|
+
current_stats: FeatureStats = field(default_factory=dict)
|
|
47
48
|
first_request: str = ""
|
|
48
49
|
last_request: str = ""
|
|
49
50
|
error_count: int = 0
|
|
@@ -40,7 +40,6 @@ from mlrun.common.schemas.model_monitoring.constants import (
|
|
|
40
40
|
ProjectSecretKeys,
|
|
41
41
|
PrometheusEndpoints,
|
|
42
42
|
)
|
|
43
|
-
from mlrun.model_monitoring.helpers import get_endpoint_record
|
|
44
43
|
from mlrun.utils import logger
|
|
45
44
|
|
|
46
45
|
|
|
@@ -807,7 +806,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
807
806
|
# left them
|
|
808
807
|
if endpoint_id not in self.endpoints:
|
|
809
808
|
logger.info("Trying to resume state", endpoint_id=endpoint_id)
|
|
810
|
-
endpoint_record = get_endpoint_record(
|
|
809
|
+
endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
|
|
811
810
|
project=self.project,
|
|
812
811
|
endpoint_id=endpoint_id,
|
|
813
812
|
)
|
|
@@ -940,7 +939,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
940
939
|
label_values = event[EventFieldType.PREDICTION]
|
|
941
940
|
# Get feature names and label columns
|
|
942
941
|
if endpoint_id not in self.feature_names:
|
|
943
|
-
endpoint_record = get_endpoint_record(
|
|
942
|
+
endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
|
|
944
943
|
project=self.project,
|
|
945
944
|
endpoint_id=endpoint_id,
|
|
946
945
|
)
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -23,14 +23,18 @@ from v3io_frames.errors import Error as V3IOFramesError
|
|
|
23
23
|
from v3io_frames.frames_pb2 import IGNORE
|
|
24
24
|
|
|
25
25
|
import mlrun.common.model_monitoring
|
|
26
|
+
import mlrun.common.schemas
|
|
26
27
|
import mlrun.common.schemas.alert as alert_constants
|
|
27
28
|
import mlrun.model_monitoring
|
|
28
29
|
import mlrun.model_monitoring.db.stores
|
|
29
30
|
import mlrun.utils.v3io_clients
|
|
30
31
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
31
32
|
EventFieldType,
|
|
33
|
+
MetricData,
|
|
34
|
+
ResultData,
|
|
32
35
|
ResultStatusApp,
|
|
33
36
|
WriterEvent,
|
|
37
|
+
WriterEventKind,
|
|
34
38
|
)
|
|
35
39
|
from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
|
|
36
40
|
from mlrun.model_monitoring.helpers import get_endpoint_record
|
|
@@ -75,20 +79,20 @@ class _Notifier:
|
|
|
75
79
|
self._severity = severity
|
|
76
80
|
|
|
77
81
|
def _should_send_event(self) -> bool:
|
|
78
|
-
return self._event[
|
|
82
|
+
return self._event[ResultData.RESULT_STATUS] >= ResultStatusApp.detected.value
|
|
79
83
|
|
|
80
84
|
def _generate_message(self) -> str:
|
|
81
85
|
return f"""\
|
|
82
86
|
The monitoring app `{self._event[WriterEvent.APPLICATION_NAME]}` \
|
|
83
|
-
of kind `{self._event[
|
|
87
|
+
of kind `{self._event[ResultData.RESULT_KIND]}` \
|
|
84
88
|
detected a problem in model endpoint ID `{self._event[WriterEvent.ENDPOINT_ID]}` \
|
|
85
89
|
at time `{self._event[WriterEvent.START_INFER_TIME]}`.
|
|
86
90
|
|
|
87
91
|
Result data:
|
|
88
|
-
Name: `{self._event[
|
|
89
|
-
Value: `{self._event[
|
|
90
|
-
Status: `{self._event[
|
|
91
|
-
Extra data: `{self._event[
|
|
92
|
+
Name: `{self._event[ResultData.RESULT_NAME]}`
|
|
93
|
+
Value: `{self._event[ResultData.RESULT_VALUE]}`
|
|
94
|
+
Status: `{self._event[ResultData.RESULT_STATUS]}`
|
|
95
|
+
Extra data: `{self._event[ResultData.RESULT_EXTRA_DATA]}`\
|
|
92
96
|
"""
|
|
93
97
|
|
|
94
98
|
def notify(self) -> None:
|
|
@@ -144,19 +148,25 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
144
148
|
rate=_TSDB_RATE,
|
|
145
149
|
)
|
|
146
150
|
|
|
147
|
-
def _update_kv_db(self, event: _AppResultEvent) -> None:
|
|
151
|
+
def _update_kv_db(self, event: _AppResultEvent, kind: str = "result") -> None:
|
|
152
|
+
if kind == "metric":
|
|
153
|
+
# TODO : Implement the logic for writing metrics to KV
|
|
154
|
+
return
|
|
148
155
|
event = _AppResultEvent(event.copy())
|
|
149
156
|
application_result_store = mlrun.model_monitoring.get_store_object(
|
|
150
157
|
project=self.project
|
|
151
158
|
)
|
|
152
159
|
application_result_store.write_application_result(event=event)
|
|
153
160
|
|
|
154
|
-
def _update_tsdb(self, event: _AppResultEvent) -> None:
|
|
161
|
+
def _update_tsdb(self, event: _AppResultEvent, kind: str = "result") -> None:
|
|
162
|
+
if kind == "metric":
|
|
163
|
+
# TODO : Implement the logic for writing metrics to TSDB
|
|
164
|
+
return
|
|
155
165
|
event = _AppResultEvent(event.copy())
|
|
156
166
|
event[WriterEvent.END_INFER_TIME] = datetime.datetime.fromisoformat(
|
|
157
167
|
event[WriterEvent.END_INFER_TIME]
|
|
158
168
|
)
|
|
159
|
-
del event[
|
|
169
|
+
del event[ResultData.RESULT_EXTRA_DATA]
|
|
160
170
|
try:
|
|
161
171
|
self._tsdb_client.write(
|
|
162
172
|
backend=_TSDB_BE,
|
|
@@ -166,7 +176,7 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
166
176
|
WriterEvent.END_INFER_TIME,
|
|
167
177
|
WriterEvent.ENDPOINT_ID,
|
|
168
178
|
WriterEvent.APPLICATION_NAME,
|
|
169
|
-
|
|
179
|
+
ResultData.RESULT_NAME,
|
|
170
180
|
],
|
|
171
181
|
)
|
|
172
182
|
logger.info("Updated V3IO TSDB successfully", table=_TSDB_TABLE)
|
|
@@ -180,20 +190,21 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
180
190
|
|
|
181
191
|
@staticmethod
|
|
182
192
|
def _generate_event_on_drift(
|
|
183
|
-
|
|
184
|
-
):
|
|
193
|
+
model_endpoint: str, drift_status: str, event_value: dict, project_name: str
|
|
194
|
+
) -> None:
|
|
185
195
|
if (
|
|
186
|
-
drift_status == ResultStatusApp.detected
|
|
187
|
-
or drift_status == ResultStatusApp.potential_detection
|
|
196
|
+
drift_status == ResultStatusApp.detected.value
|
|
197
|
+
or drift_status == ResultStatusApp.potential_detection.value
|
|
188
198
|
):
|
|
199
|
+
logger.info("Sending an alert")
|
|
189
200
|
entity = {
|
|
190
201
|
"kind": alert_constants.EventEntityKind.MODEL,
|
|
191
202
|
"project": project_name,
|
|
192
|
-
"
|
|
203
|
+
"model_endpoint": model_endpoint,
|
|
193
204
|
}
|
|
194
205
|
event_kind = (
|
|
195
206
|
alert_constants.EventKind.DRIFT_DETECTED
|
|
196
|
-
if drift_status == ResultStatusApp.detected
|
|
207
|
+
if drift_status == ResultStatusApp.detected.value
|
|
197
208
|
else alert_constants.EventKind.DRIFT_SUSPECTED
|
|
198
209
|
)
|
|
199
210
|
event_data = mlrun.common.schemas.Event(
|
|
@@ -202,37 +213,57 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
202
213
|
mlrun.get_run_db().generate_event(event_kind, event_data)
|
|
203
214
|
|
|
204
215
|
@staticmethod
|
|
205
|
-
def _reconstruct_event(event: _RawEvent) -> _AppResultEvent:
|
|
216
|
+
def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, str]:
|
|
206
217
|
"""
|
|
207
218
|
Modify the raw event into the expected monitoring application event
|
|
208
219
|
schema as defined in `mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
209
220
|
"""
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
221
|
+
if not isinstance(event, dict):
|
|
222
|
+
raise _WriterEventTypeError(
|
|
223
|
+
f"The event is of type: {type(event)}, expected a dictionary"
|
|
213
224
|
)
|
|
214
|
-
|
|
215
|
-
|
|
225
|
+
kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
|
|
226
|
+
result_event = _AppResultEvent(json.loads(event.pop(WriterEvent.DATA, "{}")))
|
|
227
|
+
if not result_event: # BC for < 1.7.0, can be removed in 1.9.0
|
|
228
|
+
result_event = _AppResultEvent(event)
|
|
229
|
+
else:
|
|
230
|
+
result_event.update(_AppResultEvent(event))
|
|
231
|
+
|
|
232
|
+
expected_keys = list(
|
|
233
|
+
set(WriterEvent.list()).difference(
|
|
234
|
+
[WriterEvent.EVENT_KIND, WriterEvent.DATA]
|
|
216
235
|
)
|
|
217
|
-
|
|
218
|
-
|
|
236
|
+
)
|
|
237
|
+
if kind == WriterEventKind.METRIC:
|
|
238
|
+
expected_keys.extend(MetricData.list())
|
|
239
|
+
elif kind == WriterEventKind.RESULT:
|
|
240
|
+
expected_keys.extend(ResultData.list())
|
|
241
|
+
else:
|
|
219
242
|
raise _WriterEventValueError(
|
|
220
|
-
"
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
raise
|
|
225
|
-
f"The event
|
|
226
|
-
|
|
243
|
+
f"Unknown event kind: {kind}, expected one of: {WriterEventKind.list()}"
|
|
244
|
+
)
|
|
245
|
+
missing_keys = [key for key in expected_keys if key not in result_event]
|
|
246
|
+
if missing_keys:
|
|
247
|
+
raise _WriterEventValueError(
|
|
248
|
+
f"The received event misses some keys compared to the expected "
|
|
249
|
+
f"monitoring application event schema: {missing_keys}"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return result_event, kind
|
|
227
253
|
|
|
228
254
|
def do(self, event: _RawEvent) -> None:
|
|
229
|
-
event = self._reconstruct_event(event)
|
|
255
|
+
event, kind = self._reconstruct_event(event)
|
|
230
256
|
logger.info("Starting to write event", event=event)
|
|
231
|
-
|
|
232
|
-
self.
|
|
257
|
+
|
|
258
|
+
self._update_tsdb(event, kind)
|
|
259
|
+
self._update_kv_db(event, kind)
|
|
260
|
+
logger.info("Completed event DB writes")
|
|
233
261
|
_Notifier(event=event, notification_pusher=self._custom_notifier).notify()
|
|
234
262
|
|
|
235
|
-
if
|
|
263
|
+
if (
|
|
264
|
+
mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
|
|
265
|
+
and kind == WriterEventKind.RESULT
|
|
266
|
+
):
|
|
236
267
|
endpoint_id = event[WriterEvent.ENDPOINT_ID]
|
|
237
268
|
endpoint_record = self._endpoints_records.setdefault(
|
|
238
269
|
endpoint_id,
|
|
@@ -242,13 +273,12 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
242
273
|
"app_name": event[WriterEvent.APPLICATION_NAME],
|
|
243
274
|
"model": endpoint_record.get(EventFieldType.MODEL),
|
|
244
275
|
"model_endpoint_id": event[WriterEvent.ENDPOINT_ID],
|
|
245
|
-
"result_name": event[
|
|
246
|
-
"result_value": event[
|
|
276
|
+
"result_name": event[ResultData.RESULT_NAME],
|
|
277
|
+
"result_value": event[ResultData.RESULT_VALUE],
|
|
247
278
|
}
|
|
248
279
|
self._generate_event_on_drift(
|
|
249
280
|
event[WriterEvent.ENDPOINT_ID],
|
|
250
|
-
event[
|
|
281
|
+
event[ResultData.RESULT_STATUS],
|
|
251
282
|
event_value,
|
|
252
283
|
self.project,
|
|
253
284
|
)
|
|
254
|
-
logger.info("Completed event DB writes")
|
mlrun/platforms/iguazio.py
CHANGED
|
@@ -525,8 +525,8 @@ def add_or_refresh_credentials(
|
|
|
525
525
|
# different access keys for the 2 usages
|
|
526
526
|
token = (
|
|
527
527
|
token
|
|
528
|
-
# can't use mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session cause this is running
|
|
529
|
-
# import execution path (when we're initializing the run db) and therefore we can't import mlrun.runtimes
|
|
528
|
+
# can't use mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session cause this is running
|
|
529
|
+
# in the import execution path (when we're initializing the run db) and therefore we can't import mlrun.runtimes
|
|
530
530
|
or os.environ.get("MLRUN_AUTH_SESSION")
|
|
531
531
|
or os.environ.get("V3IO_ACCESS_KEY")
|
|
532
532
|
)
|
mlrun/projects/project.py
CHANGED
|
@@ -41,6 +41,7 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
|
41
41
|
import mlrun.db
|
|
42
42
|
import mlrun.errors
|
|
43
43
|
import mlrun.k8s_utils
|
|
44
|
+
import mlrun.model_monitoring.applications as mm_app
|
|
44
45
|
import mlrun.runtimes
|
|
45
46
|
import mlrun.runtimes.nuclio.api_gateway
|
|
46
47
|
import mlrun.runtimes.pod
|
|
@@ -56,14 +57,10 @@ from ..artifacts.manager import ArtifactManager, dict_to_artifact, extend_artifa
|
|
|
56
57
|
from ..datastore import store_manager
|
|
57
58
|
from ..features import Feature
|
|
58
59
|
from ..model import EntrypointParam, ImageBuilder, ModelObj
|
|
59
|
-
from ..model_monitoring.application import (
|
|
60
|
-
ModelMonitoringApplicationBase,
|
|
61
|
-
)
|
|
62
60
|
from ..run import code_to_function, get_object, import_function, new_function
|
|
63
61
|
from ..secrets import SecretsStore
|
|
64
62
|
from ..utils import (
|
|
65
63
|
is_ipython,
|
|
66
|
-
is_legacy_artifact,
|
|
67
64
|
is_relative_path,
|
|
68
65
|
is_yaml_path,
|
|
69
66
|
logger,
|
|
@@ -991,13 +988,9 @@ class ProjectSpec(ModelObj):
|
|
|
991
988
|
if not isinstance(artifact, dict) and not hasattr(artifact, "to_dict"):
|
|
992
989
|
raise ValueError("artifacts must be a dict or class")
|
|
993
990
|
if isinstance(artifact, dict):
|
|
994
|
-
|
|
995
|
-
if is_legacy_artifact(artifact) or _is_imported_artifact(artifact):
|
|
996
|
-
key = artifact.get("key")
|
|
997
|
-
else:
|
|
998
|
-
key = artifact.get("metadata").get("key", "")
|
|
991
|
+
key = artifact.get("metadata", {}).get("key", "")
|
|
999
992
|
if not key:
|
|
1000
|
-
raise ValueError('artifacts "key" must be specified')
|
|
993
|
+
raise ValueError('artifacts "metadata.key" must be specified')
|
|
1001
994
|
else:
|
|
1002
995
|
key = artifact.key
|
|
1003
996
|
artifact = artifact.to_dict()
|
|
@@ -1885,7 +1878,11 @@ class MlrunProject(ModelObj):
|
|
|
1885
1878
|
def set_model_monitoring_function(
|
|
1886
1879
|
self,
|
|
1887
1880
|
func: typing.Union[str, mlrun.runtimes.BaseRuntime, None] = None,
|
|
1888
|
-
application_class: typing.Union[
|
|
1881
|
+
application_class: typing.Union[
|
|
1882
|
+
str,
|
|
1883
|
+
mm_app.ModelMonitoringApplicationBase,
|
|
1884
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
1885
|
+
] = None,
|
|
1889
1886
|
name: str = None,
|
|
1890
1887
|
image: str = None,
|
|
1891
1888
|
handler=None,
|
|
@@ -1923,11 +1920,6 @@ class MlrunProject(ModelObj):
|
|
|
1923
1920
|
monitoring application's constructor.
|
|
1924
1921
|
"""
|
|
1925
1922
|
|
|
1926
|
-
if name in mm_constants.MonitoringFunctionNames.list():
|
|
1927
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1928
|
-
f"An application cannot have the following names: "
|
|
1929
|
-
f"{mm_constants.MonitoringFunctionNames.list()}"
|
|
1930
|
-
)
|
|
1931
1923
|
function_object: RemoteRuntime = None
|
|
1932
1924
|
(
|
|
1933
1925
|
resolved_function_name,
|
|
@@ -1953,7 +1945,11 @@ class MlrunProject(ModelObj):
|
|
|
1953
1945
|
def create_model_monitoring_function(
|
|
1954
1946
|
self,
|
|
1955
1947
|
func: str = None,
|
|
1956
|
-
application_class: typing.Union[
|
|
1948
|
+
application_class: typing.Union[
|
|
1949
|
+
str,
|
|
1950
|
+
mm_app.ModelMonitoringApplicationBase,
|
|
1951
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
1952
|
+
] = None,
|
|
1957
1953
|
name: str = None,
|
|
1958
1954
|
image: str = None,
|
|
1959
1955
|
handler: str = None,
|
|
@@ -2006,7 +2002,10 @@ class MlrunProject(ModelObj):
|
|
|
2006
2002
|
self,
|
|
2007
2003
|
func: typing.Union[str, mlrun.runtimes.BaseRuntime, None] = None,
|
|
2008
2004
|
application_class: typing.Union[
|
|
2009
|
-
str,
|
|
2005
|
+
str,
|
|
2006
|
+
mm_app.ModelMonitoringApplicationBase,
|
|
2007
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
2008
|
+
None,
|
|
2010
2009
|
] = None,
|
|
2011
2010
|
name: typing.Optional[str] = None,
|
|
2012
2011
|
image: typing.Optional[str] = None,
|
|
@@ -3344,7 +3343,6 @@ class MlrunProject(ModelObj):
|
|
|
3344
3343
|
image: str = None,
|
|
3345
3344
|
set_as_default: bool = True,
|
|
3346
3345
|
with_mlrun: bool = None,
|
|
3347
|
-
skip_deployed: bool = False,
|
|
3348
3346
|
base_image: str = None,
|
|
3349
3347
|
commands: list = None,
|
|
3350
3348
|
secret_name: str = None,
|
|
@@ -3365,7 +3363,6 @@ class MlrunProject(ModelObj):
|
|
|
3365
3363
|
used. If not set, the `mlconf.default_project_image_name` value will be used
|
|
3366
3364
|
:param set_as_default: set `image` to be the project's default image (default False)
|
|
3367
3365
|
:param with_mlrun: add the current mlrun package to the container build
|
|
3368
|
-
:param skip_deployed: *Deprecated* parameter is ignored
|
|
3369
3366
|
:param base_image: base image name/path (commands and source code will be added to it) defaults to
|
|
3370
3367
|
mlrun.mlconf.default_base_image
|
|
3371
3368
|
:param commands: list of docker build (RUN) commands e.g. ['pip install pandas']
|
|
@@ -3390,14 +3387,6 @@ class MlrunProject(ModelObj):
|
|
|
3390
3387
|
base_image=base_image,
|
|
3391
3388
|
)
|
|
3392
3389
|
|
|
3393
|
-
if skip_deployed:
|
|
3394
|
-
warnings.warn(
|
|
3395
|
-
"The 'skip_deployed' parameter is deprecated and will be removed in 1.7.0. "
|
|
3396
|
-
"This parameter is ignored.",
|
|
3397
|
-
# TODO: remove in 1.7.0
|
|
3398
|
-
FutureWarning,
|
|
3399
|
-
)
|
|
3400
|
-
|
|
3401
3390
|
if not overwrite_build_params:
|
|
3402
3391
|
# TODO: change overwrite_build_params default to True in 1.8.0
|
|
3403
3392
|
warnings.warn(
|
|
@@ -3660,9 +3649,7 @@ class MlrunProject(ModelObj):
|
|
|
3660
3649
|
:returns: List of function objects.
|
|
3661
3650
|
"""
|
|
3662
3651
|
|
|
3663
|
-
model_monitoring_labels_list = [
|
|
3664
|
-
f"{mm_constants.ModelMonitoringAppLabel.KEY}={mm_constants.ModelMonitoringAppLabel.VAL}"
|
|
3665
|
-
]
|
|
3652
|
+
model_monitoring_labels_list = [str(mm_constants.ModelMonitoringAppLabel())]
|
|
3666
3653
|
if labels:
|
|
3667
3654
|
model_monitoring_labels_list += labels
|
|
3668
3655
|
return self.list_functions(
|
mlrun/render.py
CHANGED
|
@@ -121,16 +121,8 @@ def artifacts_html(
|
|
|
121
121
|
html = ""
|
|
122
122
|
|
|
123
123
|
for artifact in artifacts:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
attribute_value = artifact.get(attribute_name)
|
|
127
|
-
else:
|
|
128
|
-
attribute_value = artifact["spec"].get(attribute_name)
|
|
129
|
-
|
|
130
|
-
if mlrun.utils.is_legacy_artifact(artifact):
|
|
131
|
-
key = artifact["key"]
|
|
132
|
-
else:
|
|
133
|
-
key = artifact["metadata"]["key"]
|
|
124
|
+
attribute_value = artifact["spec"].get(attribute_name)
|
|
125
|
+
key = artifact["metadata"]["key"]
|
|
134
126
|
|
|
135
127
|
if not attribute_value:
|
|
136
128
|
mlrun.utils.logger.warning(
|