mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +31 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +13 -2
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +233 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +387 -119
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +245 -20
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +909 -231
- mlrun/db/nopdb.py +279 -14
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1176 -406
- mlrun/render.py +28 -22
- mlrun/run.py +208 -181
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +54 -24
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/METADATA +0 -269
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import traceback
|
|
17
|
+
from typing import Any, Optional, Union
|
|
18
|
+
|
|
19
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
20
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constant
|
|
21
|
+
import mlrun.datastore
|
|
22
|
+
import mlrun.model_monitoring
|
|
23
|
+
from mlrun.model_monitoring.helpers import get_stream_path
|
|
24
|
+
from mlrun.serving import GraphContext
|
|
25
|
+
from mlrun.serving.utils import StepToDict
|
|
26
|
+
from mlrun.utils import logger
|
|
27
|
+
|
|
28
|
+
from .context import MonitoringApplicationContext
|
|
29
|
+
from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class _PushToMonitoringWriter(StepToDict):
|
|
33
|
+
kind = "monitoring_application_stream_pusher"
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
project: str,
|
|
38
|
+
writer_application_name: str,
|
|
39
|
+
stream_uri: Optional[str] = None,
|
|
40
|
+
name: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Class for pushing application results to the monitoring writer stream.
|
|
44
|
+
|
|
45
|
+
:param project: Project name.
|
|
46
|
+
:param writer_application_name: Writer application name.
|
|
47
|
+
:param stream_uri: Stream URI for pushing results.
|
|
48
|
+
:param name: Name of the PushToMonitoringWriter
|
|
49
|
+
instance default to PushToMonitoringWriter.
|
|
50
|
+
"""
|
|
51
|
+
self.project = project
|
|
52
|
+
self.application_name_to_push = writer_application_name
|
|
53
|
+
self.stream_uri = stream_uri or get_stream_path(
|
|
54
|
+
project=self.project, function_name=self.application_name_to_push
|
|
55
|
+
)
|
|
56
|
+
self.output_stream = None
|
|
57
|
+
self.name = name or "PushToMonitoringWriter"
|
|
58
|
+
|
|
59
|
+
def do(
|
|
60
|
+
self,
|
|
61
|
+
event: tuple[
|
|
62
|
+
list[
|
|
63
|
+
Union[
|
|
64
|
+
ModelMonitoringApplicationResult, ModelMonitoringApplicationMetric
|
|
65
|
+
]
|
|
66
|
+
],
|
|
67
|
+
MonitoringApplicationContext,
|
|
68
|
+
],
|
|
69
|
+
) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Push application results to the monitoring writer stream.
|
|
72
|
+
|
|
73
|
+
:param event: Monitoring result(s) to push and the original event from the controller.
|
|
74
|
+
"""
|
|
75
|
+
self._lazy_init()
|
|
76
|
+
application_results, application_context = event
|
|
77
|
+
writer_event = {
|
|
78
|
+
mm_constant.WriterEvent.APPLICATION_NAME: application_context.application_name,
|
|
79
|
+
mm_constant.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
|
|
80
|
+
mm_constant.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
|
|
81
|
+
sep=" ", timespec="microseconds"
|
|
82
|
+
),
|
|
83
|
+
mm_constant.WriterEvent.END_INFER_TIME: application_context.end_infer_time.isoformat(
|
|
84
|
+
sep=" ", timespec="microseconds"
|
|
85
|
+
),
|
|
86
|
+
}
|
|
87
|
+
for result in application_results:
|
|
88
|
+
data = result.to_dict()
|
|
89
|
+
if isinstance(result, ModelMonitoringApplicationResult):
|
|
90
|
+
writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
|
|
91
|
+
mm_constant.WriterEventKind.RESULT
|
|
92
|
+
)
|
|
93
|
+
data[mm_constant.ResultData.CURRENT_STATS] = json.dumps(
|
|
94
|
+
application_context.sample_df_stats
|
|
95
|
+
)
|
|
96
|
+
writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
|
|
97
|
+
else:
|
|
98
|
+
writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
|
|
99
|
+
mm_constant.WriterEventKind.METRIC
|
|
100
|
+
)
|
|
101
|
+
writer_event[mm_constant.WriterEvent.DATA] = json.dumps(data)
|
|
102
|
+
|
|
103
|
+
writer_event[mm_constant.WriterEvent.EVENT_KIND] = (
|
|
104
|
+
mm_constant.WriterEventKind.RESULT
|
|
105
|
+
if isinstance(result, ModelMonitoringApplicationResult)
|
|
106
|
+
else mm_constant.WriterEventKind.METRIC
|
|
107
|
+
)
|
|
108
|
+
logger.info(
|
|
109
|
+
f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
|
|
110
|
+
)
|
|
111
|
+
self.output_stream.push([writer_event])
|
|
112
|
+
logger.info(f"Pushed data to {self.stream_uri} successfully")
|
|
113
|
+
|
|
114
|
+
def _lazy_init(self):
|
|
115
|
+
if self.output_stream is None:
|
|
116
|
+
self.output_stream = mlrun.datastore.get_stream_pusher(
|
|
117
|
+
self.stream_uri,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class _PrepareMonitoringEvent(StepToDict):
|
|
122
|
+
def __init__(self, context: GraphContext, application_name: str) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Class for preparing the application event for the application step.
|
|
125
|
+
|
|
126
|
+
:param application_name: Application name.
|
|
127
|
+
"""
|
|
128
|
+
self.graph_context = context
|
|
129
|
+
self.application_name = application_name
|
|
130
|
+
self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
|
|
131
|
+
|
|
132
|
+
def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
|
|
133
|
+
"""
|
|
134
|
+
Prepare the application event for the application step.
|
|
135
|
+
|
|
136
|
+
:param event: Application event.
|
|
137
|
+
:return: Application context.
|
|
138
|
+
"""
|
|
139
|
+
application_context = MonitoringApplicationContext(
|
|
140
|
+
graph_context=self.graph_context,
|
|
141
|
+
application_name=self.application_name,
|
|
142
|
+
event=event,
|
|
143
|
+
model_endpoint_dict=self.model_endpoints,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
self.model_endpoints.setdefault(
|
|
147
|
+
application_context.endpoint_id, application_context.model_endpoint
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return application_context
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class _ApplicationErrorHandler(StepToDict):
|
|
154
|
+
def __init__(self, project: str, name: Optional[str] = None):
|
|
155
|
+
self.project = project
|
|
156
|
+
self.name = name or "ApplicationErrorHandler"
|
|
157
|
+
|
|
158
|
+
def do(self, event):
|
|
159
|
+
"""
|
|
160
|
+
Handle model monitoring application error. This step will generate an event, describing the error.
|
|
161
|
+
|
|
162
|
+
:param event: Application event.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
error_data = {
|
|
166
|
+
"Endpoint ID": event.body.endpoint_id,
|
|
167
|
+
"Application Class": event.body.application_name,
|
|
168
|
+
"Error": "".join(
|
|
169
|
+
traceback.format_exception(None, event.error, event.error.__traceback__)
|
|
170
|
+
),
|
|
171
|
+
"Timestamp": event.timestamp,
|
|
172
|
+
}
|
|
173
|
+
logger.error("Error in application step", **error_data)
|
|
174
|
+
|
|
175
|
+
error_data["Error"] = event.error
|
|
176
|
+
|
|
177
|
+
event_data = alert_objects.Event(
|
|
178
|
+
kind=alert_objects.EventKind.MM_APP_FAILED,
|
|
179
|
+
entity=alert_objects.EventEntities(
|
|
180
|
+
kind=alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
|
|
181
|
+
project=self.project,
|
|
182
|
+
ids=[f"{self.project}_{event.body.application_name}"],
|
|
183
|
+
),
|
|
184
|
+
value_dict=error_data,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
mlrun.get_run_db().generate_event(
|
|
188
|
+
name=alert_objects.EventKind.MM_APP_FAILED, event_data=event_data
|
|
189
|
+
)
|
|
190
|
+
logger.info("Event generated successfully")
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from typing import Any, Union
|
|
17
|
+
|
|
18
|
+
import mlrun.model_monitoring.applications.context as mm_context
|
|
19
|
+
import mlrun.model_monitoring.applications.results as mm_results
|
|
20
|
+
from mlrun.serving.utils import MonitoringApplicationToDict
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
24
|
+
"""
|
|
25
|
+
A base class for a model monitoring application.
|
|
26
|
+
Inherit from this class to create a custom model monitoring application.
|
|
27
|
+
|
|
28
|
+
example for very simple custom application::
|
|
29
|
+
|
|
30
|
+
class MyApp(ApplicationBase):
|
|
31
|
+
def do_tracking(
|
|
32
|
+
self,
|
|
33
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
34
|
+
) -> ModelMonitoringApplicationResult:
|
|
35
|
+
monitoring_context.log_artifact(
|
|
36
|
+
TableArtifact(
|
|
37
|
+
"sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
return ModelMonitoringApplicationResult(
|
|
41
|
+
name="data_drift_test",
|
|
42
|
+
value=0.5,
|
|
43
|
+
kind=mm_constant.ResultKindApp.data_drift,
|
|
44
|
+
status=mm_constant.ResultStatusApp.detected,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
kind = "monitoring_application"
|
|
51
|
+
|
|
52
|
+
def do(
|
|
53
|
+
self, monitoring_context: mm_context.MonitoringApplicationContext
|
|
54
|
+
) -> tuple[
|
|
55
|
+
list[
|
|
56
|
+
Union[
|
|
57
|
+
mm_results.ModelMonitoringApplicationResult,
|
|
58
|
+
mm_results.ModelMonitoringApplicationMetric,
|
|
59
|
+
]
|
|
60
|
+
],
|
|
61
|
+
mm_context.MonitoringApplicationContext,
|
|
62
|
+
]:
|
|
63
|
+
"""
|
|
64
|
+
Process the monitoring event and return application results & metrics.
|
|
65
|
+
|
|
66
|
+
:param monitoring_context: (MonitoringApplicationContext) The monitoring application context.
|
|
67
|
+
:returns: A tuple of:
|
|
68
|
+
[0] = list of application results that can be either from type
|
|
69
|
+
`ModelMonitoringApplicationResult`
|
|
70
|
+
or from type `ModelMonitoringApplicationResult`.
|
|
71
|
+
[1] = the original application event, wrapped in `MonitoringApplicationContext`
|
|
72
|
+
object
|
|
73
|
+
"""
|
|
74
|
+
results = self.do_tracking(monitoring_context=monitoring_context)
|
|
75
|
+
if isinstance(results, dict):
|
|
76
|
+
results = [
|
|
77
|
+
mm_results.ModelMonitoringApplicationMetric(name=key, value=value)
|
|
78
|
+
for key, value in results.items()
|
|
79
|
+
]
|
|
80
|
+
results = results if isinstance(results, list) else [results]
|
|
81
|
+
return results, monitoring_context
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def do_tracking(
|
|
85
|
+
self,
|
|
86
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
87
|
+
) -> Union[
|
|
88
|
+
mm_results.ModelMonitoringApplicationResult,
|
|
89
|
+
list[
|
|
90
|
+
Union[
|
|
91
|
+
mm_results.ModelMonitoringApplicationResult,
|
|
92
|
+
mm_results.ModelMonitoringApplicationMetric,
|
|
93
|
+
]
|
|
94
|
+
],
|
|
95
|
+
dict[str, Any],
|
|
96
|
+
]:
|
|
97
|
+
"""
|
|
98
|
+
Implement this method with your custom monitoring logic.
|
|
99
|
+
|
|
100
|
+
:param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
|
|
101
|
+
|
|
102
|
+
:returns: (ModelMonitoringApplicationResult) or
|
|
103
|
+
(list[Union[ModelMonitoringApplicationResult,
|
|
104
|
+
ModelMonitoringApplicationMetric]])
|
|
105
|
+
or dict that contains the application metrics only (in this case the name of
|
|
106
|
+
each metric name is the key and the metric value is the corresponding value).
|
|
107
|
+
"""
|
|
108
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import socket
|
|
17
|
+
from typing import Any, Optional, cast
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
import mlrun.common.constants as mlrun_constants
|
|
23
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
24
|
+
import mlrun.feature_store as fstore
|
|
25
|
+
import mlrun.features
|
|
26
|
+
import mlrun.serving
|
|
27
|
+
import mlrun.utils
|
|
28
|
+
from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
|
|
29
|
+
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
|
|
30
|
+
from mlrun.model_monitoring.helpers import (
|
|
31
|
+
calculate_inputs_statistics,
|
|
32
|
+
get_endpoint_record,
|
|
33
|
+
)
|
|
34
|
+
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class MonitoringApplicationContext:
|
|
38
|
+
"""
|
|
39
|
+
The monitoring context holds all the relevant information for the monitoring application,
|
|
40
|
+
and also it can be used for logging artifacts and results.
|
|
41
|
+
The monitoring context has the following attributes:
|
|
42
|
+
|
|
43
|
+
:param application_name: (str) The model monitoring application name.
|
|
44
|
+
:param project_name: (str) The project name.
|
|
45
|
+
:param project: (MlrunProject) The project object.
|
|
46
|
+
:param logger: (mlrun.utils.Logger) MLRun logger.
|
|
47
|
+
:param nuclio_logger: (nuclio.request.Logger) Nuclio logger.
|
|
48
|
+
:param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
|
|
49
|
+
:param feature_stats: (FeatureStats) The train sample distribution dictionary.
|
|
50
|
+
:param sample_df: (pd.DataFrame) The new sample DataFrame.
|
|
51
|
+
:param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
|
|
52
|
+
:param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
|
|
53
|
+
:param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
|
|
54
|
+
:param endpoint_id: (str) ID of the monitored model endpoint
|
|
55
|
+
:param output_stream_uri: (str) URI of the output stream for results
|
|
56
|
+
:param model_endpoint: (ModelEndpoint) The model endpoint object.
|
|
57
|
+
:param feature_names: (list[str]) List of models feature names.
|
|
58
|
+
:param label_names: (list[str]) List of models label names.
|
|
59
|
+
:param model: (tuple[str, ModelArtifact, dict]) The model file, model spec object,
|
|
60
|
+
and a list of extra data items.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
*,
|
|
66
|
+
graph_context: mlrun.serving.GraphContext,
|
|
67
|
+
application_name: str,
|
|
68
|
+
event: dict[str, Any],
|
|
69
|
+
model_endpoint_dict: dict[str, ModelEndpoint],
|
|
70
|
+
) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Initialize a `MonitoringApplicationContext` object.
|
|
73
|
+
Note: this object should not be instantiated manually.
|
|
74
|
+
|
|
75
|
+
:param application_name: The application name.
|
|
76
|
+
:param event: The instance data dictionary.
|
|
77
|
+
:param model_endpoint_dict: Dictionary of model endpoints.
|
|
78
|
+
"""
|
|
79
|
+
self.application_name = application_name
|
|
80
|
+
|
|
81
|
+
self.project_name = graph_context.project
|
|
82
|
+
self.project = mlrun.load_project(url=self.project_name)
|
|
83
|
+
|
|
84
|
+
# MLRun Logger
|
|
85
|
+
self.logger = mlrun.utils.create_logger(
|
|
86
|
+
level=mlrun.mlconf.log_level,
|
|
87
|
+
formatter_kind=mlrun.mlconf.log_formatter,
|
|
88
|
+
name="monitoring-application",
|
|
89
|
+
)
|
|
90
|
+
# Nuclio logger - `nuclio.request.Logger`.
|
|
91
|
+
# Note: this logger does not accept keyword arguments.
|
|
92
|
+
self.nuclio_logger = graph_context.logger
|
|
93
|
+
|
|
94
|
+
# event data
|
|
95
|
+
self.start_infer_time = pd.Timestamp(
|
|
96
|
+
cast(str, event.get(mm_constants.ApplicationEvent.START_INFER_TIME))
|
|
97
|
+
)
|
|
98
|
+
self.end_infer_time = pd.Timestamp(
|
|
99
|
+
cast(str, event.get(mm_constants.ApplicationEvent.END_INFER_TIME))
|
|
100
|
+
)
|
|
101
|
+
self.endpoint_id = cast(
|
|
102
|
+
str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
|
|
103
|
+
)
|
|
104
|
+
self.output_stream_uri = cast(
|
|
105
|
+
str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
self._feature_stats: Optional[FeatureStats] = None
|
|
109
|
+
self._sample_df_stats: Optional[FeatureStats] = None
|
|
110
|
+
|
|
111
|
+
# Default labels for the artifacts
|
|
112
|
+
self._default_labels = self._get_default_labels()
|
|
113
|
+
|
|
114
|
+
# Persistent data - fetched when needed
|
|
115
|
+
self._sample_df: Optional[pd.DataFrame] = None
|
|
116
|
+
self._model_endpoint: Optional[ModelEndpoint] = model_endpoint_dict.get(
|
|
117
|
+
self.endpoint_id
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def _get_default_labels(self) -> dict[str, str]:
|
|
121
|
+
return {
|
|
122
|
+
mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
|
|
123
|
+
mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-app",
|
|
124
|
+
mlrun_constants.MLRunInternalLabels.app_name: self.application_name,
|
|
125
|
+
mlrun_constants.MLRunInternalLabels.endpoint_id: self.endpoint_id,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
def _add_default_labels(self, labels: Optional[dict[str, str]]) -> dict[str, str]:
|
|
129
|
+
"""Add the default labels to logged artifacts labels"""
|
|
130
|
+
return (labels or {}) | self._default_labels
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def sample_df(self) -> pd.DataFrame:
|
|
134
|
+
if self._sample_df is None:
|
|
135
|
+
feature_set = fstore.get_feature_set(
|
|
136
|
+
self.model_endpoint.status.monitoring_feature_set_uri
|
|
137
|
+
)
|
|
138
|
+
features = [f"{feature_set.metadata.name}.*"]
|
|
139
|
+
vector = fstore.FeatureVector(
|
|
140
|
+
name=f"{self.endpoint_id}_vector",
|
|
141
|
+
features=features,
|
|
142
|
+
with_indexes=True,
|
|
143
|
+
)
|
|
144
|
+
vector.metadata.tag = self.application_name
|
|
145
|
+
vector.feature_set_objects = {feature_set.metadata.name: feature_set}
|
|
146
|
+
|
|
147
|
+
offline_response = vector.get_offline_features(
|
|
148
|
+
start_time=self.start_infer_time,
|
|
149
|
+
end_time=self.end_infer_time,
|
|
150
|
+
timestamp_for_filtering=mm_constants.FeatureSetFeatures.time_stamp(),
|
|
151
|
+
)
|
|
152
|
+
self._sample_df = offline_response.to_dataframe().reset_index(drop=True)
|
|
153
|
+
return self._sample_df
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def model_endpoint(self) -> ModelEndpoint:
|
|
157
|
+
if not self._model_endpoint:
|
|
158
|
+
self._model_endpoint = ModelEndpoint.from_flat_dict(
|
|
159
|
+
get_endpoint_record(self.project_name, self.endpoint_id)
|
|
160
|
+
)
|
|
161
|
+
return self._model_endpoint
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def feature_stats(self) -> FeatureStats:
|
|
165
|
+
if not self._feature_stats:
|
|
166
|
+
self._feature_stats = json.loads(self.model_endpoint.status.feature_stats)
|
|
167
|
+
pad_features_hist(self._feature_stats)
|
|
168
|
+
return self._feature_stats
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def sample_df_stats(self) -> FeatureStats:
|
|
172
|
+
"""statistics of the sample dataframe"""
|
|
173
|
+
if not self._sample_df_stats:
|
|
174
|
+
self._sample_df_stats = calculate_inputs_statistics(
|
|
175
|
+
self.feature_stats, self.sample_df
|
|
176
|
+
)
|
|
177
|
+
return self._sample_df_stats
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def feature_names(self) -> list[str]:
|
|
181
|
+
"""The feature names of the model"""
|
|
182
|
+
feature_names = self.model_endpoint.spec.feature_names
|
|
183
|
+
return (
|
|
184
|
+
feature_names
|
|
185
|
+
if isinstance(feature_names, list)
|
|
186
|
+
else json.loads(feature_names)
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def label_names(self) -> list[str]:
|
|
191
|
+
"""The label names of the model"""
|
|
192
|
+
label_names = self.model_endpoint.spec.label_names
|
|
193
|
+
return label_names if isinstance(label_names, list) else json.loads(label_names)
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def model(self) -> tuple[str, ModelArtifact, dict]:
|
|
197
|
+
"""The model file, model spec object, and a list of extra data items"""
|
|
198
|
+
return get_model(self.model_endpoint.spec.model_uri)
|
|
199
|
+
|
|
200
|
+
@staticmethod
|
|
201
|
+
def dict_to_histogram(histogram_dict: FeatureStats) -> pd.DataFrame:
|
|
202
|
+
"""
|
|
203
|
+
Convert histogram dictionary to pandas DataFrame with feature histograms as columns
|
|
204
|
+
|
|
205
|
+
:param histogram_dict: Histogram dictionary
|
|
206
|
+
|
|
207
|
+
:returns: Histogram dataframe
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
# Create a dictionary with feature histograms as values
|
|
211
|
+
histograms = {}
|
|
212
|
+
for feature, stats in histogram_dict.items():
|
|
213
|
+
if "hist" in stats:
|
|
214
|
+
# Normalize to probability distribution of each feature
|
|
215
|
+
histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
|
|
216
|
+
|
|
217
|
+
# Convert the dictionary to pandas DataFrame
|
|
218
|
+
histograms = pd.DataFrame(histograms)
|
|
219
|
+
|
|
220
|
+
return histograms
|
|
221
|
+
|
|
222
|
+
def log_artifact(
|
|
223
|
+
self,
|
|
224
|
+
item,
|
|
225
|
+
body=None,
|
|
226
|
+
tag: str = "",
|
|
227
|
+
local_path: str = "",
|
|
228
|
+
artifact_path: Optional[str] = None,
|
|
229
|
+
format: Optional[str] = None,
|
|
230
|
+
upload: Optional[bool] = None,
|
|
231
|
+
labels: Optional[dict[str, str]] = None,
|
|
232
|
+
target_path: Optional[str] = None,
|
|
233
|
+
**kwargs,
|
|
234
|
+
) -> Artifact:
|
|
235
|
+
"""
|
|
236
|
+
Log an artifact.
|
|
237
|
+
See :func:`~mlrun.projects.MlrunProject.log_artifact` for the documentation.
|
|
238
|
+
"""
|
|
239
|
+
labels = self._add_default_labels(labels)
|
|
240
|
+
return self.project.log_artifact(
|
|
241
|
+
item,
|
|
242
|
+
body=body,
|
|
243
|
+
tag=tag,
|
|
244
|
+
local_path=local_path,
|
|
245
|
+
artifact_path=artifact_path,
|
|
246
|
+
format=format,
|
|
247
|
+
upload=upload,
|
|
248
|
+
labels=labels,
|
|
249
|
+
target_path=target_path,
|
|
250
|
+
**kwargs,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def log_dataset(
|
|
254
|
+
self,
|
|
255
|
+
key,
|
|
256
|
+
df,
|
|
257
|
+
tag="",
|
|
258
|
+
local_path=None,
|
|
259
|
+
artifact_path=None,
|
|
260
|
+
upload=None,
|
|
261
|
+
labels=None,
|
|
262
|
+
format="",
|
|
263
|
+
preview=None,
|
|
264
|
+
stats=None,
|
|
265
|
+
target_path="",
|
|
266
|
+
extra_data=None,
|
|
267
|
+
label_column: Optional[str] = None,
|
|
268
|
+
**kwargs,
|
|
269
|
+
) -> DatasetArtifact:
|
|
270
|
+
"""
|
|
271
|
+
Log a dataset artifact.
|
|
272
|
+
See :func:`~mlrun.projects.MlrunProject.log_dataset` for the documentation.
|
|
273
|
+
"""
|
|
274
|
+
labels = self._add_default_labels(labels)
|
|
275
|
+
return self.project.log_dataset(
|
|
276
|
+
key,
|
|
277
|
+
df,
|
|
278
|
+
tag=tag,
|
|
279
|
+
local_path=local_path,
|
|
280
|
+
artifact_path=artifact_path,
|
|
281
|
+
upload=upload,
|
|
282
|
+
labels=labels,
|
|
283
|
+
format=format,
|
|
284
|
+
preview=preview,
|
|
285
|
+
stats=stats,
|
|
286
|
+
target_path=target_path,
|
|
287
|
+
extra_data=extra_data,
|
|
288
|
+
label_column=label_column,
|
|
289
|
+
**kwargs,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def log_model(
|
|
293
|
+
self,
|
|
294
|
+
key,
|
|
295
|
+
body=None,
|
|
296
|
+
framework="",
|
|
297
|
+
tag="",
|
|
298
|
+
model_dir=None,
|
|
299
|
+
model_file=None,
|
|
300
|
+
algorithm=None,
|
|
301
|
+
metrics=None,
|
|
302
|
+
parameters=None,
|
|
303
|
+
artifact_path=None,
|
|
304
|
+
upload=None,
|
|
305
|
+
labels=None,
|
|
306
|
+
inputs: Optional[list[mlrun.features.Feature]] = None,
|
|
307
|
+
outputs: Optional[list[mlrun.features.Feature]] = None,
|
|
308
|
+
feature_vector: Optional[str] = None,
|
|
309
|
+
feature_weights: Optional[list] = None,
|
|
310
|
+
training_set=None,
|
|
311
|
+
label_column=None,
|
|
312
|
+
extra_data=None,
|
|
313
|
+
**kwargs,
|
|
314
|
+
) -> ModelArtifact:
|
|
315
|
+
"""
|
|
316
|
+
Log a model artifact.
|
|
317
|
+
See :func:`~mlrun.projects.MlrunProject.log_model` for the documentation.
|
|
318
|
+
"""
|
|
319
|
+
labels = self._add_default_labels(labels)
|
|
320
|
+
return self.project.log_model(
|
|
321
|
+
key,
|
|
322
|
+
body=body,
|
|
323
|
+
framework=framework,
|
|
324
|
+
tag=tag,
|
|
325
|
+
model_dir=model_dir,
|
|
326
|
+
model_file=model_file,
|
|
327
|
+
algorithm=algorithm,
|
|
328
|
+
metrics=metrics,
|
|
329
|
+
parameters=parameters,
|
|
330
|
+
artifact_path=artifact_path,
|
|
331
|
+
upload=upload,
|
|
332
|
+
labels=labels,
|
|
333
|
+
inputs=inputs,
|
|
334
|
+
outputs=outputs,
|
|
335
|
+
feature_vector=feature_vector,
|
|
336
|
+
feature_weights=feature_weights,
|
|
337
|
+
training_set=training_set,
|
|
338
|
+
label_column=label_column,
|
|
339
|
+
extra_data=extra_data,
|
|
340
|
+
**kwargs,
|
|
341
|
+
)
|