mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +25 -111
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +38 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +41 -47
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +68 -0
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +25 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +8 -2
- mlrun/common/schemas/client_spec.py +2 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +19 -3
- mlrun/common/schemas/model_monitoring/constants.py +96 -26
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +22 -21
- mlrun/common/types.py +7 -1
- mlrun/config.py +87 -19
- mlrun/data_types/data_types.py +4 -0
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +69 -30
- mlrun/datastore/datastore.py +10 -2
- mlrun/datastore/datastore_profile.py +90 -6
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +172 -44
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +285 -41
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +149 -14
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +608 -178
- mlrun/db/nopdb.py +191 -7
- mlrun/errors.py +11 -0
- mlrun/execution.py +37 -20
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +21 -52
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +2 -1
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +30 -19
- mlrun/features.py +4 -13
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +9 -3
- mlrun/launcher/remote.py +9 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +58 -19
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +127 -301
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +30 -36
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +100 -7
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +93 -228
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +30 -16
- mlrun/projects/pipelines.py +92 -99
- mlrun/projects/project.py +757 -268
- mlrun/render.py +15 -14
- mlrun/run.py +160 -162
- mlrun/runtimes/__init__.py +55 -3
- mlrun/runtimes/base.py +33 -19
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +98 -58
- mlrun/runtimes/nuclio/serving.py +36 -42
- mlrun/runtimes/pod.py +196 -45
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +7 -4
- mlrun/serving/server.py +7 -8
- mlrun/serving/states.py +73 -43
- mlrun/serving/v2_serving.py +8 -7
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +141 -75
- mlrun/utils/http.py +1 -1
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +12 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -12,29 +12,29 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import datetime
|
|
16
15
|
import json
|
|
17
|
-
from http import HTTPStatus
|
|
18
16
|
from typing import Any, NewType
|
|
19
17
|
|
|
20
|
-
import pandas as pd
|
|
21
|
-
from v3io.dataplane import Client as V3IOClient
|
|
22
|
-
from v3io_frames.client import ClientBase as V3IOFramesClient
|
|
23
|
-
from v3io_frames.errors import Error as V3IOFramesError
|
|
24
|
-
from v3io_frames.frames_pb2 import IGNORE
|
|
25
|
-
|
|
26
18
|
import mlrun.common.model_monitoring
|
|
19
|
+
import mlrun.common.schemas
|
|
20
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
27
21
|
import mlrun.model_monitoring
|
|
28
|
-
|
|
29
|
-
|
|
22
|
+
from mlrun.common.schemas.model_monitoring.constants import (
|
|
23
|
+
EventFieldType,
|
|
24
|
+
HistogramDataDriftApplicationConstants,
|
|
25
|
+
MetricData,
|
|
26
|
+
ResultData,
|
|
27
|
+
ResultKindApp,
|
|
28
|
+
ResultStatusApp,
|
|
29
|
+
WriterEvent,
|
|
30
|
+
WriterEventKind,
|
|
31
|
+
)
|
|
30
32
|
from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
|
|
33
|
+
from mlrun.model_monitoring.helpers import get_endpoint_record, get_result_instance_fqn
|
|
31
34
|
from mlrun.serving.utils import StepToDict
|
|
32
35
|
from mlrun.utils import logger
|
|
33
36
|
from mlrun.utils.notifications.notification_pusher import CustomNotificationPusher
|
|
34
37
|
|
|
35
|
-
_TSDB_BE = "tsdb"
|
|
36
|
-
_TSDB_RATE = "1/s"
|
|
37
|
-
_TSDB_TABLE = "app-results"
|
|
38
38
|
_RawEvent = dict[str, Any]
|
|
39
39
|
_AppResultEvent = NewType("_AppResultEvent", _RawEvent)
|
|
40
40
|
|
|
@@ -69,20 +69,20 @@ class _Notifier:
|
|
|
69
69
|
self._severity = severity
|
|
70
70
|
|
|
71
71
|
def _should_send_event(self) -> bool:
|
|
72
|
-
return self._event[
|
|
72
|
+
return self._event[ResultData.RESULT_STATUS] >= ResultStatusApp.detected.value
|
|
73
73
|
|
|
74
74
|
def _generate_message(self) -> str:
|
|
75
75
|
return f"""\
|
|
76
76
|
The monitoring app `{self._event[WriterEvent.APPLICATION_NAME]}` \
|
|
77
|
-
of kind `{self._event[
|
|
77
|
+
of kind `{self._event[ResultData.RESULT_KIND]}` \
|
|
78
78
|
detected a problem in model endpoint ID `{self._event[WriterEvent.ENDPOINT_ID]}` \
|
|
79
79
|
at time `{self._event[WriterEvent.START_INFER_TIME]}`.
|
|
80
80
|
|
|
81
81
|
Result data:
|
|
82
|
-
Name: `{self._event[
|
|
83
|
-
Value: `{self._event[
|
|
84
|
-
Status: `{self._event[
|
|
85
|
-
Extra data: `{self._event[
|
|
82
|
+
Name: `{self._event[ResultData.RESULT_NAME]}`
|
|
83
|
+
Value: `{self._event[ResultData.RESULT_VALUE]}`
|
|
84
|
+
Status: `{self._event[ResultData.RESULT_STATUS]}`
|
|
85
|
+
Extra data: `{self._event[ResultData.RESULT_EXTRA_DATA]}`\
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
88
|
def notify(self) -> None:
|
|
@@ -97,140 +97,168 @@ Extra data: `{self._event[WriterEvent.RESULT_EXTRA_DATA]}`\
|
|
|
97
97
|
|
|
98
98
|
class ModelMonitoringWriter(StepToDict):
|
|
99
99
|
"""
|
|
100
|
-
Write monitoring
|
|
100
|
+
Write monitoring application results to the target databases
|
|
101
101
|
"""
|
|
102
102
|
|
|
103
103
|
kind = "monitoring_application_stream_pusher"
|
|
104
104
|
|
|
105
|
-
def __init__(self, project: str) -> None:
|
|
105
|
+
def __init__(self, project: str, tsdb_secret_provider=None) -> None:
|
|
106
106
|
self.project = project
|
|
107
107
|
self.name = project # required for the deployment process
|
|
108
|
-
|
|
109
|
-
self._kv_client = self._get_v3io_client().kv
|
|
110
|
-
self._tsdb_client = self._get_v3io_frames_client(self._v3io_container)
|
|
108
|
+
|
|
111
109
|
self._custom_notifier = CustomNotificationPusher(
|
|
112
110
|
notification_types=[NotificationKind.slack]
|
|
113
111
|
)
|
|
114
|
-
self._create_tsdb_table()
|
|
115
|
-
self._kv_schemas = []
|
|
116
|
-
|
|
117
|
-
@staticmethod
|
|
118
|
-
def get_v3io_container(project_name: str) -> str:
|
|
119
|
-
return f"users/pipelines/{project_name}/monitoring-apps"
|
|
120
112
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
return mlrun.utils.v3io_clients.get_v3io_client(
|
|
124
|
-
endpoint=mlrun.mlconf.v3io_api,
|
|
113
|
+
self._app_result_store = mlrun.model_monitoring.get_store_object(
|
|
114
|
+
project=self.project
|
|
125
115
|
)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _get_v3io_frames_client(v3io_container: str) -> V3IOFramesClient:
|
|
129
|
-
return mlrun.utils.v3io_clients.get_frames_client(
|
|
130
|
-
address=mlrun.mlconf.v3io_framesd,
|
|
131
|
-
container=v3io_container,
|
|
116
|
+
self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
117
|
+
project=self.project, secret_provider=tsdb_secret_provider
|
|
132
118
|
)
|
|
119
|
+
self._endpoints_records = {}
|
|
133
120
|
|
|
134
|
-
def
|
|
135
|
-
self
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
121
|
+
def _generate_event_on_drift(
|
|
122
|
+
self,
|
|
123
|
+
entity_id: str,
|
|
124
|
+
result_status: int,
|
|
125
|
+
event_value: dict,
|
|
126
|
+
project_name: str,
|
|
127
|
+
result_kind: int,
|
|
128
|
+
) -> None:
|
|
129
|
+
logger.info("Sending an event")
|
|
130
|
+
entity = mlrun.common.schemas.alert.EventEntities(
|
|
131
|
+
kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
|
|
132
|
+
project=project_name,
|
|
133
|
+
ids=[entity_id],
|
|
140
134
|
)
|
|
141
135
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
endpoint_id = event.pop(WriterEvent.ENDPOINT_ID)
|
|
145
|
-
app_name = event.pop(WriterEvent.APPLICATION_NAME)
|
|
146
|
-
metric_name = event.pop(WriterEvent.RESULT_NAME)
|
|
147
|
-
attributes = {metric_name: json.dumps(event)}
|
|
148
|
-
self._kv_client.update(
|
|
149
|
-
container=self._v3io_container,
|
|
150
|
-
table_path=endpoint_id,
|
|
151
|
-
key=app_name,
|
|
152
|
-
attributes=attributes,
|
|
136
|
+
event_kind = self._generate_alert_event_kind(
|
|
137
|
+
result_status=result_status, result_kind=result_kind
|
|
153
138
|
)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
"""Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
|
|
160
|
-
fields = [
|
|
161
|
-
{"name": WriterEvent.RESULT_NAME, "type": "string", "nullable": False}
|
|
162
|
-
]
|
|
163
|
-
res = self._kv_client.create_schema(
|
|
164
|
-
container=self._v3io_container,
|
|
165
|
-
table_path=endpoint_id,
|
|
166
|
-
key=WriterEvent.APPLICATION_NAME,
|
|
167
|
-
fields=fields,
|
|
139
|
+
|
|
140
|
+
event_data = mlrun.common.schemas.Event(
|
|
141
|
+
kind=alert_objects.EventKind(value=event_kind),
|
|
142
|
+
entity=entity,
|
|
143
|
+
value_dict=event_value,
|
|
168
144
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
145
|
+
mlrun.get_run_db().generate_event(event_kind, event_data)
|
|
146
|
+
|
|
147
|
+
@staticmethod
|
|
148
|
+
def _generate_alert_event_kind(
|
|
149
|
+
result_kind: int, result_status: int
|
|
150
|
+
) -> alert_objects.EventKind:
|
|
151
|
+
"""Generate the required Event Kind format for the alerting system"""
|
|
152
|
+
if result_kind == ResultKindApp.custom.value:
|
|
153
|
+
# Custom kind is represented as an anomaly detection
|
|
154
|
+
event_kind = "mm_app_anomaly"
|
|
173
155
|
else:
|
|
174
|
-
|
|
175
|
-
"Generated V3IO KV schema successfully", endpoint_id=endpoint_id
|
|
176
|
-
)
|
|
177
|
-
self._kv_schemas.append(endpoint_id)
|
|
156
|
+
event_kind = ResultKindApp(value=result_kind).name
|
|
178
157
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
)
|
|
184
|
-
del event[WriterEvent.RESULT_EXTRA_DATA]
|
|
185
|
-
try:
|
|
186
|
-
self._tsdb_client.write(
|
|
187
|
-
backend=_TSDB_BE,
|
|
188
|
-
table=_TSDB_TABLE,
|
|
189
|
-
dfs=pd.DataFrame.from_records([event]),
|
|
190
|
-
index_cols=[
|
|
191
|
-
WriterEvent.END_INFER_TIME,
|
|
192
|
-
WriterEvent.ENDPOINT_ID,
|
|
193
|
-
WriterEvent.APPLICATION_NAME,
|
|
194
|
-
WriterEvent.RESULT_NAME,
|
|
195
|
-
],
|
|
196
|
-
)
|
|
197
|
-
logger.info("Updated V3IO TSDB successfully", table=_TSDB_TABLE)
|
|
198
|
-
except V3IOFramesError as err:
|
|
199
|
-
logger.warn(
|
|
200
|
-
"Could not write drift measures to TSDB",
|
|
201
|
-
err=err,
|
|
202
|
-
table=_TSDB_TABLE,
|
|
203
|
-
event=event,
|
|
204
|
-
)
|
|
158
|
+
if result_status == ResultStatusApp.detected.value:
|
|
159
|
+
event_kind = f"{event_kind}_detected"
|
|
160
|
+
else:
|
|
161
|
+
event_kind = f"{event_kind}_suspected"
|
|
162
|
+
return alert_objects.EventKind(value=event_kind)
|
|
205
163
|
|
|
206
164
|
@staticmethod
|
|
207
|
-
def _reconstruct_event(event: _RawEvent) -> _AppResultEvent:
|
|
165
|
+
def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, WriterEventKind]:
|
|
208
166
|
"""
|
|
209
167
|
Modify the raw event into the expected monitoring application event
|
|
210
168
|
schema as defined in `mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
211
169
|
"""
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
170
|
+
if not isinstance(event, dict):
|
|
171
|
+
raise _WriterEventTypeError(
|
|
172
|
+
f"The event is of type: {type(event)}, expected a dictionary"
|
|
215
173
|
)
|
|
216
|
-
|
|
217
|
-
|
|
174
|
+
kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
|
|
175
|
+
result_event = _AppResultEvent(json.loads(event.pop(WriterEvent.DATA, "{}")))
|
|
176
|
+
if not result_event: # BC for < 1.7.0, can be removed in 1.9.0
|
|
177
|
+
result_event = _AppResultEvent(event)
|
|
178
|
+
else:
|
|
179
|
+
result_event.update(_AppResultEvent(event))
|
|
180
|
+
|
|
181
|
+
expected_keys = list(
|
|
182
|
+
set(WriterEvent.list()).difference(
|
|
183
|
+
[WriterEvent.EVENT_KIND, WriterEvent.DATA]
|
|
218
184
|
)
|
|
219
|
-
|
|
220
|
-
|
|
185
|
+
)
|
|
186
|
+
if kind == WriterEventKind.METRIC:
|
|
187
|
+
expected_keys.extend(MetricData.list())
|
|
188
|
+
elif kind == WriterEventKind.RESULT:
|
|
189
|
+
expected_keys.extend(ResultData.list())
|
|
190
|
+
else:
|
|
221
191
|
raise _WriterEventValueError(
|
|
222
|
-
"
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
raise
|
|
227
|
-
f"The event
|
|
228
|
-
|
|
192
|
+
f"Unknown event kind: {kind}, expected one of: {WriterEventKind.list()}"
|
|
193
|
+
)
|
|
194
|
+
missing_keys = [key for key in expected_keys if key not in result_event]
|
|
195
|
+
if missing_keys:
|
|
196
|
+
raise _WriterEventValueError(
|
|
197
|
+
f"The received event misses some keys compared to the expected "
|
|
198
|
+
f"monitoring application event schema: {missing_keys}"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return result_event, kind
|
|
229
202
|
|
|
230
203
|
def do(self, event: _RawEvent) -> None:
|
|
231
|
-
event = self._reconstruct_event(event)
|
|
204
|
+
event, kind = self._reconstruct_event(event)
|
|
232
205
|
logger.info("Starting to write event", event=event)
|
|
233
|
-
self.
|
|
234
|
-
self.
|
|
235
|
-
|
|
206
|
+
self._tsdb_connector.write_application_event(event=event.copy(), kind=kind)
|
|
207
|
+
self._app_result_store.write_application_event(event=event.copy(), kind=kind)
|
|
208
|
+
|
|
236
209
|
logger.info("Completed event DB writes")
|
|
210
|
+
|
|
211
|
+
if kind == WriterEventKind.RESULT:
|
|
212
|
+
_Notifier(event=event, notification_pusher=self._custom_notifier).notify()
|
|
213
|
+
|
|
214
|
+
if (
|
|
215
|
+
mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
|
|
216
|
+
and kind == WriterEventKind.RESULT
|
|
217
|
+
and (
|
|
218
|
+
event[ResultData.RESULT_STATUS] == ResultStatusApp.detected.value
|
|
219
|
+
or event[ResultData.RESULT_STATUS]
|
|
220
|
+
== ResultStatusApp.potential_detection.value
|
|
221
|
+
)
|
|
222
|
+
):
|
|
223
|
+
endpoint_id = event[WriterEvent.ENDPOINT_ID]
|
|
224
|
+
endpoint_record = self._endpoints_records.setdefault(
|
|
225
|
+
endpoint_id,
|
|
226
|
+
get_endpoint_record(project=self.project, endpoint_id=endpoint_id),
|
|
227
|
+
)
|
|
228
|
+
event_value = {
|
|
229
|
+
"app_name": event[WriterEvent.APPLICATION_NAME],
|
|
230
|
+
"model": endpoint_record.get(EventFieldType.MODEL),
|
|
231
|
+
"model_endpoint_id": event[WriterEvent.ENDPOINT_ID],
|
|
232
|
+
"result_name": event[ResultData.RESULT_NAME],
|
|
233
|
+
"result_value": event[ResultData.RESULT_VALUE],
|
|
234
|
+
}
|
|
235
|
+
self._generate_event_on_drift(
|
|
236
|
+
entity_id=get_result_instance_fqn(
|
|
237
|
+
event[WriterEvent.ENDPOINT_ID],
|
|
238
|
+
event[WriterEvent.APPLICATION_NAME],
|
|
239
|
+
event[ResultData.RESULT_NAME],
|
|
240
|
+
),
|
|
241
|
+
result_status=event[ResultData.RESULT_STATUS],
|
|
242
|
+
event_value=event_value,
|
|
243
|
+
project_name=self.project,
|
|
244
|
+
result_kind=event[ResultData.RESULT_KIND],
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if (
|
|
248
|
+
kind == WriterEventKind.RESULT
|
|
249
|
+
and event[WriterEvent.APPLICATION_NAME]
|
|
250
|
+
== HistogramDataDriftApplicationConstants.NAME
|
|
251
|
+
and event[ResultData.RESULT_NAME]
|
|
252
|
+
== HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME
|
|
253
|
+
):
|
|
254
|
+
endpoint_id = event[WriterEvent.ENDPOINT_ID]
|
|
255
|
+
logger.info(
|
|
256
|
+
"Updating the model endpoint with metadata specific to the histogram "
|
|
257
|
+
"data drift app",
|
|
258
|
+
endpoint_id=endpoint_id,
|
|
259
|
+
)
|
|
260
|
+
store = mlrun.model_monitoring.get_store_object(project=self.project)
|
|
261
|
+
store.update_model_endpoint(
|
|
262
|
+
endpoint_id=endpoint_id,
|
|
263
|
+
attributes=json.loads(event[ResultData.RESULT_EXTRA_DATA]),
|
|
264
|
+
)
|
|
@@ -142,11 +142,11 @@ class _YAMLFormatter(_Formatter):
|
|
|
142
142
|
|
|
143
143
|
:param obj: The object to write.
|
|
144
144
|
:param file_path: The file path to write to.
|
|
145
|
-
:param dump_kwargs: Additional keyword arguments to pass to the `yaml.
|
|
145
|
+
:param dump_kwargs: Additional keyword arguments to pass to the `yaml.safe_dump` method of the formatter in use.
|
|
146
146
|
"""
|
|
147
147
|
dump_kwargs = dump_kwargs or cls.DEFAULT_DUMP_KWARGS
|
|
148
148
|
with open(file_path, "w") as file:
|
|
149
|
-
yaml.
|
|
149
|
+
yaml.safe_dump(obj, file, **dump_kwargs)
|
|
150
150
|
|
|
151
151
|
@classmethod
|
|
152
152
|
def read(cls, file_path: str) -> Union[list, dict]:
|
mlrun/platforms/__init__.py
CHANGED
|
@@ -17,22 +17,23 @@ import json
|
|
|
17
17
|
from pprint import pprint
|
|
18
18
|
from time import sleep
|
|
19
19
|
|
|
20
|
-
from .
|
|
21
|
-
|
|
22
|
-
VolumeMount,
|
|
23
|
-
add_or_refresh_credentials,
|
|
24
|
-
is_iguazio_session_cookie,
|
|
25
|
-
mount_v3io,
|
|
26
|
-
v3io_cred,
|
|
27
|
-
)
|
|
28
|
-
from .other import (
|
|
20
|
+
from mlrun_pipelines.common.mounts import VolumeMount
|
|
21
|
+
from mlrun_pipelines.mounts import (
|
|
29
22
|
auto_mount,
|
|
30
23
|
mount_configmap,
|
|
31
24
|
mount_hostpath,
|
|
32
25
|
mount_pvc,
|
|
33
26
|
mount_s3,
|
|
34
27
|
mount_secret,
|
|
28
|
+
mount_v3io,
|
|
35
29
|
set_env_variables,
|
|
30
|
+
v3io_cred,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
from .iguazio import (
|
|
34
|
+
V3ioStreamClient,
|
|
35
|
+
add_or_refresh_credentials,
|
|
36
|
+
is_iguazio_session_cookie,
|
|
36
37
|
)
|
|
37
38
|
|
|
38
39
|
|
|
@@ -48,7 +49,7 @@ def watch_stream(
|
|
|
48
49
|
|
|
49
50
|
example::
|
|
50
51
|
|
|
51
|
-
watch_stream(
|
|
52
|
+
watch_stream("v3io:///users/admin/mystream")
|
|
52
53
|
|
|
53
54
|
:param url: stream url
|
|
54
55
|
:param shard_ids: range or list of shard IDs
|
mlrun/platforms/iguazio.py
CHANGED
|
@@ -15,12 +15,9 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import urllib
|
|
18
|
-
from collections import namedtuple
|
|
19
18
|
from urllib.parse import urlparse
|
|
20
19
|
|
|
21
|
-
import kfp.dsl
|
|
22
20
|
import requests
|
|
23
|
-
import semver
|
|
24
21
|
import v3io
|
|
25
22
|
|
|
26
23
|
import mlrun.errors
|
|
@@ -29,203 +26,6 @@ from mlrun.utils import dict_to_json
|
|
|
29
26
|
|
|
30
27
|
_cached_control_session = None
|
|
31
28
|
|
|
32
|
-
VolumeMount = namedtuple("Mount", ["path", "sub_path"])
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def mount_v3io(
|
|
36
|
-
name="v3io",
|
|
37
|
-
remote="",
|
|
38
|
-
access_key="",
|
|
39
|
-
user="",
|
|
40
|
-
secret=None,
|
|
41
|
-
volume_mounts=None,
|
|
42
|
-
):
|
|
43
|
-
"""Modifier function to apply to a Container Op to volume mount a v3io path
|
|
44
|
-
|
|
45
|
-
:param name: the volume name
|
|
46
|
-
:param remote: the v3io path to use for the volume. ~/ prefix will be replaced with /users/<username>/
|
|
47
|
-
:param access_key: the access key used to auth against v3io. if not given V3IO_ACCESS_KEY env var will be used
|
|
48
|
-
:param user: the username used to auth against v3io. if not given V3IO_USERNAME env var will be used
|
|
49
|
-
:param secret: k8s secret name which would be used to get the username and access key to auth against v3io.
|
|
50
|
-
:param volume_mounts: list of VolumeMount. empty volume mounts & remote will default to mount /v3io & /User.
|
|
51
|
-
"""
|
|
52
|
-
volume_mounts, user = _enrich_and_validate_v3io_mounts(
|
|
53
|
-
remote=remote,
|
|
54
|
-
volume_mounts=volume_mounts,
|
|
55
|
-
user=user,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
def _attach_volume_mounts_and_creds(container_op: kfp.dsl.ContainerOp):
|
|
59
|
-
from kubernetes import client as k8s_client
|
|
60
|
-
|
|
61
|
-
vol = v3io_to_vol(name, remote, access_key, user, secret=secret)
|
|
62
|
-
container_op.add_volume(vol)
|
|
63
|
-
for volume_mount in volume_mounts:
|
|
64
|
-
container_op.container.add_volume_mount(
|
|
65
|
-
k8s_client.V1VolumeMount(
|
|
66
|
-
mount_path=volume_mount.path,
|
|
67
|
-
sub_path=volume_mount.sub_path,
|
|
68
|
-
name=name,
|
|
69
|
-
)
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
if not secret:
|
|
73
|
-
container_op = v3io_cred(access_key=access_key, user=user)(container_op)
|
|
74
|
-
return container_op
|
|
75
|
-
|
|
76
|
-
return _attach_volume_mounts_and_creds
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _enrich_and_validate_v3io_mounts(remote="", volume_mounts=None, user=""):
|
|
80
|
-
if remote and not volume_mounts:
|
|
81
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
82
|
-
"volume_mounts must be specified when remote is given"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# Empty remote & volume_mounts defaults are volume mounts of /v3io and /User
|
|
86
|
-
if not remote and not volume_mounts:
|
|
87
|
-
user = _resolve_mount_user(user)
|
|
88
|
-
if not user:
|
|
89
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
90
|
-
"user name/env must be specified when using empty remote and volume_mounts"
|
|
91
|
-
)
|
|
92
|
-
volume_mounts = [
|
|
93
|
-
VolumeMount(path="/v3io", sub_path=""),
|
|
94
|
-
VolumeMount(path="/User", sub_path="users/" + user),
|
|
95
|
-
]
|
|
96
|
-
|
|
97
|
-
if not isinstance(volume_mounts, list) and any(
|
|
98
|
-
[not isinstance(x, VolumeMount) for x in volume_mounts]
|
|
99
|
-
):
|
|
100
|
-
raise TypeError("mounts should be a list of Mount")
|
|
101
|
-
|
|
102
|
-
return volume_mounts, user
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _resolve_mount_user(user=None):
|
|
106
|
-
return user or os.environ.get("V3IO_USERNAME")
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def mount_spark_conf():
|
|
110
|
-
def _mount_spark(container_op: kfp.dsl.ContainerOp):
|
|
111
|
-
from kubernetes import client as k8s_client
|
|
112
|
-
|
|
113
|
-
container_op.container.add_volume_mount(
|
|
114
|
-
k8s_client.V1VolumeMount(
|
|
115
|
-
name="spark-master-config", mount_path="/etc/config/spark"
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
return container_op
|
|
119
|
-
|
|
120
|
-
return _mount_spark
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def mount_v3iod(namespace, v3io_config_configmap):
|
|
124
|
-
def _mount_v3iod(container_op: kfp.dsl.ContainerOp):
|
|
125
|
-
from kubernetes import client as k8s_client
|
|
126
|
-
|
|
127
|
-
def add_vol(name, mount_path, host_path):
|
|
128
|
-
vol = k8s_client.V1Volume(
|
|
129
|
-
name=name,
|
|
130
|
-
host_path=k8s_client.V1HostPathVolumeSource(path=host_path, type=""),
|
|
131
|
-
)
|
|
132
|
-
container_op.add_volume(vol)
|
|
133
|
-
container_op.container.add_volume_mount(
|
|
134
|
-
k8s_client.V1VolumeMount(mount_path=mount_path, name=name)
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# this is a legacy path for the daemon shared memory
|
|
138
|
-
host_path = "/dev/shm/"
|
|
139
|
-
|
|
140
|
-
# path to shared memory for daemon was changed in Iguazio 3.2.3-b1
|
|
141
|
-
igz_version = mlrun.mlconf.get_parsed_igz_version()
|
|
142
|
-
if igz_version and igz_version >= semver.VersionInfo.parse("3.2.3-b1"):
|
|
143
|
-
host_path = "/var/run/iguazio/dayman-shm/"
|
|
144
|
-
add_vol(name="shm", mount_path="/dev/shm", host_path=host_path + namespace)
|
|
145
|
-
|
|
146
|
-
add_vol(
|
|
147
|
-
name="v3iod-comm",
|
|
148
|
-
mount_path="/var/run/iguazio/dayman",
|
|
149
|
-
host_path="/var/run/iguazio/dayman/" + namespace,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
vol = k8s_client.V1Volume(
|
|
153
|
-
name="daemon-health", empty_dir=k8s_client.V1EmptyDirVolumeSource()
|
|
154
|
-
)
|
|
155
|
-
container_op.add_volume(vol)
|
|
156
|
-
container_op.container.add_volume_mount(
|
|
157
|
-
k8s_client.V1VolumeMount(
|
|
158
|
-
mount_path="/var/run/iguazio/daemon_health", name="daemon-health"
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
vol = k8s_client.V1Volume(
|
|
163
|
-
name="v3io-config",
|
|
164
|
-
config_map=k8s_client.V1ConfigMapVolumeSource(
|
|
165
|
-
name=v3io_config_configmap, default_mode=420
|
|
166
|
-
),
|
|
167
|
-
)
|
|
168
|
-
container_op.add_volume(vol)
|
|
169
|
-
container_op.container.add_volume_mount(
|
|
170
|
-
k8s_client.V1VolumeMount(mount_path="/etc/config/v3io", name="v3io-config")
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
container_op.container.add_env_variable(
|
|
174
|
-
k8s_client.V1EnvVar(
|
|
175
|
-
name="CURRENT_NODE_IP",
|
|
176
|
-
value_from=k8s_client.V1EnvVarSource(
|
|
177
|
-
field_ref=k8s_client.V1ObjectFieldSelector(
|
|
178
|
-
api_version="v1", field_path="status.hostIP"
|
|
179
|
-
)
|
|
180
|
-
),
|
|
181
|
-
)
|
|
182
|
-
)
|
|
183
|
-
container_op.container.add_env_variable(
|
|
184
|
-
k8s_client.V1EnvVar(
|
|
185
|
-
name="IGZ_DATA_CONFIG_FILE", value="/igz/java/conf/v3io.conf"
|
|
186
|
-
)
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
return container_op
|
|
190
|
-
|
|
191
|
-
return _mount_v3iod
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def v3io_cred(api="", user="", access_key=""):
|
|
195
|
-
"""
|
|
196
|
-
Modifier function to copy local v3io env vars to container
|
|
197
|
-
|
|
198
|
-
Usage::
|
|
199
|
-
|
|
200
|
-
train = train_op(...)
|
|
201
|
-
train.apply(use_v3io_cred())
|
|
202
|
-
"""
|
|
203
|
-
|
|
204
|
-
def _use_v3io_cred(container_op: kfp.dsl.ContainerOp):
|
|
205
|
-
from os import environ
|
|
206
|
-
|
|
207
|
-
from kubernetes import client as k8s_client
|
|
208
|
-
|
|
209
|
-
web_api = api or environ.get("V3IO_API") or mlconf.v3io_api
|
|
210
|
-
_user = user or environ.get("V3IO_USERNAME")
|
|
211
|
-
_access_key = access_key or environ.get("V3IO_ACCESS_KEY")
|
|
212
|
-
v3io_framesd = mlconf.v3io_framesd or environ.get("V3IO_FRAMESD")
|
|
213
|
-
|
|
214
|
-
return (
|
|
215
|
-
container_op.container.add_env_variable(
|
|
216
|
-
k8s_client.V1EnvVar(name="V3IO_API", value=web_api)
|
|
217
|
-
)
|
|
218
|
-
.add_env_variable(k8s_client.V1EnvVar(name="V3IO_USERNAME", value=_user))
|
|
219
|
-
.add_env_variable(
|
|
220
|
-
k8s_client.V1EnvVar(name="V3IO_ACCESS_KEY", value=_access_key)
|
|
221
|
-
)
|
|
222
|
-
.add_env_variable(
|
|
223
|
-
k8s_client.V1EnvVar(name="V3IO_FRAMESD", value=v3io_framesd)
|
|
224
|
-
)
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
return _use_v3io_cred
|
|
228
|
-
|
|
229
29
|
|
|
230
30
|
def split_path(mntpath=""):
|
|
231
31
|
if mntpath[0] == "/":
|
|
@@ -525,8 +325,8 @@ def add_or_refresh_credentials(
|
|
|
525
325
|
# different access keys for the 2 usages
|
|
526
326
|
token = (
|
|
527
327
|
token
|
|
528
|
-
# can't use mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session cause this is running
|
|
529
|
-
# import execution path (when we're initializing the run db) and therefore we can't import mlrun.runtimes
|
|
328
|
+
# can't use mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session cause this is running
|
|
329
|
+
# in the import execution path (when we're initializing the run db) and therefore we can't import mlrun.runtimes
|
|
530
330
|
or os.environ.get("MLRUN_AUTH_SESSION")
|
|
531
331
|
or os.environ.get("V3IO_ACCESS_KEY")
|
|
532
332
|
)
|
|
@@ -582,3 +382,22 @@ def sanitize_username(username: str):
|
|
|
582
382
|
So simply replace it with dash
|
|
583
383
|
"""
|
|
584
384
|
return username.replace("_", "-")
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def min_iguazio_versions(*versions):
|
|
388
|
+
def decorator(function):
|
|
389
|
+
def wrapper(*args, **kwargs):
|
|
390
|
+
if mlrun.utils.helpers.validate_component_version_compatibility(
|
|
391
|
+
"iguazio", *versions
|
|
392
|
+
):
|
|
393
|
+
return function(*args, **kwargs)
|
|
394
|
+
|
|
395
|
+
message = (
|
|
396
|
+
f"{function.__name__} is supported since Iguazio {' or '.join(versions)}, currently using "
|
|
397
|
+
f"Iguazio {mlconf.igz_version}."
|
|
398
|
+
)
|
|
399
|
+
raise mlrun.errors.MLRunIncompatibleVersionError(message)
|
|
400
|
+
|
|
401
|
+
return wrapper
|
|
402
|
+
|
|
403
|
+
return decorator
|