mlrun 1.7.0rc22__py3-none-any.whl → 1.7.0rc24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/helpers.py +11 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/api_gateway.py +57 -16
- mlrun/common/schemas/feature_store.py +78 -28
- mlrun/db/base.py +1 -0
- mlrun/db/httpdb.py +9 -6
- mlrun/db/nopdb.py +1 -0
- mlrun/errors.py +1 -3
- mlrun/frameworks/__init__.py +0 -6
- mlrun/model_monitoring/db/stores/__init__.py +27 -21
- mlrun/model_monitoring/db/stores/base/store.py +1 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +8 -8
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +8 -8
- mlrun/model_monitoring/db/tsdb/__init__.py +1 -1
- mlrun/model_monitoring/db/tsdb/base.py +1 -1
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +2 -3
- mlrun/model_monitoring/helpers.py +8 -4
- mlrun/model_monitoring/stream_processing.py +9 -11
- mlrun/model_monitoring/writer.py +10 -6
- mlrun/package/__init__.py +1 -13
- mlrun/package/packagers/__init__.py +1 -6
- mlrun/projects/project.py +5 -1
- mlrun/runtimes/nuclio/application/application.py +0 -2
- mlrun/runtimes/nuclio/serving.py +9 -6
- mlrun/serving/server.py +4 -0
- mlrun/serving/v2_serving.py +54 -38
- mlrun/utils/notifications/notification/base.py +39 -7
- mlrun/utils/notifications/notification/slack.py +1 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc24.dist-info}/METADATA +1 -1
- {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc24.dist-info}/RECORD +35 -35
- {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc24.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc24.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc24.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc24.dist-info}/top_level.txt +0 -0
|
@@ -97,7 +97,7 @@ def get_monitoring_parquet_path(
|
|
|
97
97
|
return parquet_path
|
|
98
98
|
|
|
99
99
|
|
|
100
|
-
def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
100
|
+
def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -> str:
|
|
101
101
|
"""Get endpoint store connection string from the project secret. If wasn't set, take it from the system
|
|
102
102
|
configurations.
|
|
103
103
|
|
|
@@ -117,7 +117,7 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
def get_tsdb_connection_string(
|
|
120
|
-
secret_provider: typing.Optional[typing.Callable] = None,
|
|
120
|
+
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
121
121
|
) -> str:
|
|
122
122
|
"""Get TSDB connection string from the project secret. If wasn't set, take it from the system
|
|
123
123
|
configurations.
|
|
@@ -278,9 +278,13 @@ def calculate_inputs_statistics(
|
|
|
278
278
|
return inputs_statistics
|
|
279
279
|
|
|
280
280
|
|
|
281
|
-
def get_endpoint_record(
|
|
281
|
+
def get_endpoint_record(
|
|
282
|
+
project: str,
|
|
283
|
+
endpoint_id: str,
|
|
284
|
+
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
285
|
+
) -> dict[str, typing.Any]:
|
|
282
286
|
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
283
|
-
project=project,
|
|
287
|
+
project=project, secret_provider=secret_provider
|
|
284
288
|
)
|
|
285
289
|
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
286
290
|
|
|
@@ -66,10 +66,6 @@ class EventStreamProcessor:
|
|
|
66
66
|
self.parquet_batching_max_events = parquet_batching_max_events
|
|
67
67
|
self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
|
|
68
68
|
|
|
69
|
-
self.model_endpoint_store_target = (
|
|
70
|
-
mlrun.mlconf.model_endpoint_monitoring.store_type
|
|
71
|
-
)
|
|
72
|
-
|
|
73
69
|
logger.info(
|
|
74
70
|
"Initializing model monitoring event stream processor",
|
|
75
71
|
parquet_path=self.parquet_path,
|
|
@@ -139,7 +135,7 @@ class EventStreamProcessor:
|
|
|
139
135
|
def apply_monitoring_serving_graph(
|
|
140
136
|
self,
|
|
141
137
|
fn: mlrun.runtimes.ServingRuntime,
|
|
142
|
-
|
|
138
|
+
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
143
139
|
) -> None:
|
|
144
140
|
"""
|
|
145
141
|
Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
|
|
@@ -167,7 +163,8 @@ class EventStreamProcessor:
|
|
|
167
163
|
using CE, the parquet target path is based on the defined MLRun artifact path.
|
|
168
164
|
|
|
169
165
|
:param fn: A serving function.
|
|
170
|
-
:param
|
|
166
|
+
:param secret_provider: An optional callable function that provides the connection string from the project
|
|
167
|
+
secret.
|
|
171
168
|
"""
|
|
172
169
|
|
|
173
170
|
graph = typing.cast(
|
|
@@ -293,7 +290,6 @@ class EventStreamProcessor:
|
|
|
293
290
|
name="UpdateEndpoint",
|
|
294
291
|
after="ProcessBeforeEndpointUpdate",
|
|
295
292
|
project=self.project,
|
|
296
|
-
model_endpoint_store_target=self.model_endpoint_store_target,
|
|
297
293
|
)
|
|
298
294
|
|
|
299
295
|
apply_update_endpoint()
|
|
@@ -310,7 +306,10 @@ class EventStreamProcessor:
|
|
|
310
306
|
table=self.kv_path,
|
|
311
307
|
)
|
|
312
308
|
|
|
313
|
-
|
|
309
|
+
store_object = mlrun.model_monitoring.get_store_object(
|
|
310
|
+
project=self.project, secret_provider=secret_provider
|
|
311
|
+
)
|
|
312
|
+
if store_object.type == ModelEndpointTarget.V3IO_NOSQL:
|
|
314
313
|
apply_infer_schema()
|
|
315
314
|
|
|
316
315
|
# Emits the event in window size of events based on sample_window size (10 by default)
|
|
@@ -328,7 +327,7 @@ class EventStreamProcessor:
|
|
|
328
327
|
# TSDB branch (skip to Prometheus if in CE env)
|
|
329
328
|
if not mlrun.mlconf.is_ce_mode():
|
|
330
329
|
tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
331
|
-
project=self.project, secret_provider=
|
|
330
|
+
project=self.project, secret_provider=secret_provider
|
|
332
331
|
)
|
|
333
332
|
tsdb_connector.apply_monitoring_stream_steps(graph=graph)
|
|
334
333
|
|
|
@@ -904,7 +903,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
904
903
|
|
|
905
904
|
|
|
906
905
|
class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
|
|
907
|
-
def __init__(self, project: str,
|
|
906
|
+
def __init__(self, project: str, **kwargs):
|
|
908
907
|
"""
|
|
909
908
|
Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
|
|
910
909
|
the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
|
|
@@ -914,7 +913,6 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
|
|
|
914
913
|
"""
|
|
915
914
|
super().__init__(**kwargs)
|
|
916
915
|
self.project = project
|
|
917
|
-
self.model_endpoint_store_target = model_endpoint_store_target
|
|
918
916
|
|
|
919
917
|
def do(self, event: dict):
|
|
920
918
|
# Remove labels from the event
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
-
from typing import Any, NewType
|
|
16
|
+
from typing import Any, Callable, NewType
|
|
17
17
|
|
|
18
18
|
import mlrun.common.model_monitoring
|
|
19
19
|
import mlrun.common.schemas
|
|
@@ -30,7 +30,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
|
|
|
30
30
|
WriterEventKind,
|
|
31
31
|
)
|
|
32
32
|
from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
|
|
33
|
-
from mlrun.model_monitoring.helpers import
|
|
33
|
+
from mlrun.model_monitoring.helpers import get_result_instance_fqn
|
|
34
34
|
from mlrun.serving.utils import StepToDict
|
|
35
35
|
from mlrun.utils import logger
|
|
36
36
|
from mlrun.utils.notifications.notification_pusher import CustomNotificationPusher
|
|
@@ -102,7 +102,11 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
102
102
|
|
|
103
103
|
kind = "monitoring_application_stream_pusher"
|
|
104
104
|
|
|
105
|
-
def __init__(
|
|
105
|
+
def __init__(
|
|
106
|
+
self,
|
|
107
|
+
project: str,
|
|
108
|
+
secret_provider: Callable = None,
|
|
109
|
+
) -> None:
|
|
106
110
|
self.project = project
|
|
107
111
|
self.name = project # required for the deployment process
|
|
108
112
|
|
|
@@ -111,10 +115,10 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
111
115
|
)
|
|
112
116
|
|
|
113
117
|
self._app_result_store = mlrun.model_monitoring.get_store_object(
|
|
114
|
-
project=self.project
|
|
118
|
+
project=self.project, secret_provider=secret_provider
|
|
115
119
|
)
|
|
116
120
|
self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
117
|
-
project=self.project, secret_provider=
|
|
121
|
+
project=self.project, secret_provider=secret_provider
|
|
118
122
|
)
|
|
119
123
|
self._endpoints_records = {}
|
|
120
124
|
|
|
@@ -223,7 +227,7 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
223
227
|
endpoint_id = event[WriterEvent.ENDPOINT_ID]
|
|
224
228
|
endpoint_record = self._endpoints_records.setdefault(
|
|
225
229
|
endpoint_id,
|
|
226
|
-
|
|
230
|
+
self._app_result_store.get_model_endpoint(endpoint_id=endpoint_id),
|
|
227
231
|
)
|
|
228
232
|
event_value = {
|
|
229
233
|
"app_name": event[WriterEvent.APPLICATION_NAME],
|
mlrun/package/__init__.py
CHANGED
|
@@ -12,19 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
|
|
16
|
-
MLRun package enables fully-automated experiment and pipeline tracking and reproducibility, and easy passing of
|
|
17
|
-
python objects between remote jobs, while not requiring any form of editing to the actual function original code.
|
|
18
|
-
Simply set the function code in a project and run it, MLRun takes care of the rest.
|
|
19
|
-
|
|
20
|
-
MLRun uses packagers: classes that perform 2 tasks:
|
|
21
|
-
|
|
22
|
-
#. **Parsing inputs** - automatically cast the runtime's inputs (user's input passed to the function via
|
|
23
|
-
the ``inputs`` parameter of the ``run`` method) to the relevant hinted type. (Does not require handling of data items.)
|
|
24
|
-
#. **Logging outputs** - automatically save, log, and upload the function's returned objects by the provided
|
|
25
|
-
log hints (user's input passed to the function via the ``returns`` parameter of the ``run`` method).
|
|
26
|
-
(Does not require handling of files and artifacts.)
|
|
27
|
-
"""
|
|
15
|
+
|
|
28
16
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
29
17
|
|
|
30
18
|
import functools
|
|
@@ -12,12 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
r"""
|
|
16
|
-
MLRun comes with the following list of modules, out of the box. All of the packagers listed here
|
|
17
|
-
use the implementation of :ref:`DefaultPackager <mlrun.package.packagers.default\_packager.DefaultPackager>` and are
|
|
18
|
-
available by default at the start of each run.
|
|
19
|
-
"""
|
|
20
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
21
15
|
|
|
16
|
+
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
22
17
|
from .default_packager import DefaultPackager
|
|
23
18
|
from .numpy_packagers import NumPySupportedFormat
|
mlrun/projects/project.py
CHANGED
|
@@ -2116,6 +2116,7 @@ class MlrunProject(ModelObj):
|
|
|
2116
2116
|
*,
|
|
2117
2117
|
deploy_histogram_data_drift_app: bool = True,
|
|
2118
2118
|
wait_for_deployment: bool = False,
|
|
2119
|
+
rebuild_images: bool = False,
|
|
2119
2120
|
) -> None:
|
|
2120
2121
|
"""
|
|
2121
2122
|
Deploy model monitoring application controller, writer and stream functions.
|
|
@@ -2135,6 +2136,7 @@ class MlrunProject(ModelObj):
|
|
|
2135
2136
|
:param wait_for_deployment: If true, return only after the deployment is done on the backend.
|
|
2136
2137
|
Otherwise, deploy the model monitoring infrastructure on the
|
|
2137
2138
|
background, including the histogram data drift app if selected.
|
|
2139
|
+
:param rebuild_images: If true, force rebuild of model monitoring infrastructure images.
|
|
2138
2140
|
"""
|
|
2139
2141
|
if default_controller_image != "mlrun/mlrun":
|
|
2140
2142
|
# TODO: Remove this in 1.9.0
|
|
@@ -2150,6 +2152,7 @@ class MlrunProject(ModelObj):
|
|
|
2150
2152
|
image=image,
|
|
2151
2153
|
base_period=base_period,
|
|
2152
2154
|
deploy_histogram_data_drift_app=deploy_histogram_data_drift_app,
|
|
2155
|
+
rebuild_images=rebuild_images,
|
|
2153
2156
|
)
|
|
2154
2157
|
|
|
2155
2158
|
if wait_for_deployment:
|
|
@@ -3192,7 +3195,8 @@ class MlrunProject(ModelObj):
|
|
|
3192
3195
|
tsdb_connection: Optional[str] = None,
|
|
3193
3196
|
):
|
|
3194
3197
|
"""Set the credentials that will be used by the project's model monitoring
|
|
3195
|
-
infrastructure functions.
|
|
3198
|
+
infrastructure functions. Important to note that you have to set the credentials before deploying any
|
|
3199
|
+
model monitoring or serving function.
|
|
3196
3200
|
|
|
3197
3201
|
:param access_key: Model Monitoring access key for managing user permissions
|
|
3198
3202
|
:param endpoint_store_connection: Endpoint store connection string
|
|
@@ -263,7 +263,6 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
263
263
|
is_kfp=False,
|
|
264
264
|
mlrun_version_specifier=None,
|
|
265
265
|
show_on_failure: bool = False,
|
|
266
|
-
skip_access_key_auth: bool = False,
|
|
267
266
|
direct_port_access: bool = False,
|
|
268
267
|
authentication_mode: schemas.APIGatewayAuthenticationMode = None,
|
|
269
268
|
authentication_creds: tuple[str] = None,
|
|
@@ -283,7 +282,6 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
283
282
|
:param is_kfp: Deploy as part of a kfp pipeline
|
|
284
283
|
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
285
284
|
:param show_on_failure: Show logs only in case of build failure
|
|
286
|
-
:param skip_access_key_auth: Skip adding access key auth to the API Gateway
|
|
287
285
|
:param direct_port_access: Set True to allow direct port access to the application sidecar
|
|
288
286
|
:param authentication_mode: API Gateway authentication mode
|
|
289
287
|
:param authentication_creds: API Gateway authentication credentials as a tuple (username, password)
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -312,15 +312,18 @@ class ServingRuntime(RemoteRuntime):
|
|
|
312
312
|
sample: Optional[int] = None,
|
|
313
313
|
stream_args: Optional[dict] = None,
|
|
314
314
|
tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
|
|
315
|
+
enable_tracking: bool = True,
|
|
315
316
|
) -> None:
|
|
316
317
|
"""apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
|
|
317
318
|
and analyze performance.
|
|
318
319
|
|
|
319
|
-
:param stream_path:
|
|
320
|
-
|
|
321
|
-
:param batch:
|
|
322
|
-
:param sample:
|
|
323
|
-
:param stream_args:
|
|
320
|
+
:param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
|
|
321
|
+
you can use the "dummy://" path for test/simulation.
|
|
322
|
+
:param batch: Micro batch size (send micro batches of N records at a time).
|
|
323
|
+
:param sample: Sample size (send only one of N records).
|
|
324
|
+
:param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
|
|
325
|
+
:param enable_tracking: Enabled/Disable model-monitoring tracking.
|
|
326
|
+
Default True (tracking enabled).
|
|
324
327
|
|
|
325
328
|
example::
|
|
326
329
|
|
|
@@ -331,7 +334,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
331
334
|
|
|
332
335
|
"""
|
|
333
336
|
# Applying model monitoring configurations
|
|
334
|
-
self.spec.track_models =
|
|
337
|
+
self.spec.track_models = enable_tracking
|
|
335
338
|
|
|
336
339
|
if stream_path:
|
|
337
340
|
self.spec.parameters["log_stream"] = stream_path
|
mlrun/serving/server.py
CHANGED
|
@@ -383,6 +383,10 @@ def v2_serving_handler(context, event, get_body=False):
|
|
|
383
383
|
if event.body == b"":
|
|
384
384
|
event.body = None
|
|
385
385
|
|
|
386
|
+
# ML-6065 – workaround for NUC-178
|
|
387
|
+
if hasattr(event, "trigger") and event.trigger.kind in ("kafka", "kafka-cluster"):
|
|
388
|
+
event.path = "/"
|
|
389
|
+
|
|
386
390
|
return context._server.run(event, context, get_body)
|
|
387
391
|
|
|
388
392
|
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -542,48 +542,64 @@ def _init_endpoint_record(
|
|
|
542
542
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
543
543
|
).uid
|
|
544
544
|
|
|
545
|
-
# If model endpoint object was found in DB, skip the creation process.
|
|
546
545
|
try:
|
|
547
|
-
mlrun.get_run_db().get_model_endpoint(
|
|
548
|
-
|
|
546
|
+
model_ep = mlrun.get_run_db().get_model_endpoint(
|
|
547
|
+
project=project, endpoint_id=uid
|
|
548
|
+
)
|
|
549
549
|
except mlrun.errors.MLRunNotFoundError:
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
active=True,
|
|
566
|
-
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
567
|
-
if model.context.server.track_models
|
|
568
|
-
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
|
|
569
|
-
),
|
|
570
|
-
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
571
|
-
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
|
|
550
|
+
model_ep = None
|
|
551
|
+
|
|
552
|
+
if model.context.server.track_models and not model_ep:
|
|
553
|
+
logger.debug("Creating a new model endpoint record", endpoint_id=uid)
|
|
554
|
+
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
555
|
+
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
556
|
+
project=project, labels=model.labels, uid=uid
|
|
557
|
+
),
|
|
558
|
+
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
559
|
+
function_uri=graph_server.function_uri,
|
|
560
|
+
model=versioned_model_name,
|
|
561
|
+
model_class=model.__class__.__name__,
|
|
562
|
+
model_uri=model.model_path,
|
|
563
|
+
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
564
|
+
project=project, kind="stream"
|
|
572
565
|
),
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
model_endpoint=model_endpoint.dict(),
|
|
581
|
-
)
|
|
566
|
+
active=True,
|
|
567
|
+
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
568
|
+
),
|
|
569
|
+
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
570
|
+
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
|
|
571
|
+
),
|
|
572
|
+
)
|
|
582
573
|
|
|
583
|
-
|
|
584
|
-
|
|
574
|
+
db = mlrun.get_run_db()
|
|
575
|
+
db.create_model_endpoint(
|
|
576
|
+
project=project,
|
|
577
|
+
endpoint_id=uid,
|
|
578
|
+
model_endpoint=model_endpoint.dict(),
|
|
579
|
+
)
|
|
585
580
|
|
|
586
|
-
|
|
587
|
-
|
|
581
|
+
elif (
|
|
582
|
+
model_ep
|
|
583
|
+
and (
|
|
584
|
+
model_ep.spec.monitoring_mode
|
|
585
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
586
|
+
)
|
|
587
|
+
!= model.context.server.track_models
|
|
588
|
+
):
|
|
589
|
+
monitoring_mode = (
|
|
590
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
591
|
+
if model.context.server.track_models
|
|
592
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
593
|
+
)
|
|
594
|
+
db = mlrun.get_run_db()
|
|
595
|
+
db.patch_model_endpoint(
|
|
596
|
+
project=project,
|
|
597
|
+
endpoint_id=uid,
|
|
598
|
+
attributes={"monitoring_mode": monitoring_mode},
|
|
599
|
+
)
|
|
600
|
+
logger.debug(
|
|
601
|
+
f"Updating model endpoint monitoring_mode to {monitoring_mode}",
|
|
602
|
+
endpoint_id=uid,
|
|
603
|
+
)
|
|
588
604
|
|
|
589
605
|
return uid
|
|
@@ -69,16 +69,27 @@ class NotificationBase:
|
|
|
69
69
|
if custom_html:
|
|
70
70
|
return custom_html
|
|
71
71
|
|
|
72
|
-
if self.name:
|
|
73
|
-
message = f"{self.name}: {message}"
|
|
74
|
-
|
|
75
72
|
if alert:
|
|
76
73
|
if not event_data:
|
|
77
74
|
return f"[{severity}] {message}"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
75
|
+
|
|
76
|
+
html = f"<h3>[{severity}] {message}</h3>"
|
|
77
|
+
html += f"<br>{alert.name} alert has occurred<br>"
|
|
78
|
+
html += f"<br><h4>Project:</h4>{alert.project}<br>"
|
|
79
|
+
html += f"<br><h4>ID:</h4>{event_data.entity.ids[0]}<br>"
|
|
80
|
+
html += f"<br><h4>Summary:</h4>{mlrun.utils.helpers.format_alert_summary(alert, event_data)}<br>"
|
|
81
|
+
|
|
82
|
+
if event_data.value_dict:
|
|
83
|
+
html += "<br><h4>Event data:</h4>"
|
|
84
|
+
for key, value in event_data.value_dict.items():
|
|
85
|
+
html += f"{key}: {value}<br>"
|
|
86
|
+
|
|
87
|
+
overview_type, url = self._get_overview_type_and_url(alert, event_data)
|
|
88
|
+
html += f"<br><h4>Overview:</h4><a href={url}>{overview_type}</a>"
|
|
89
|
+
return html
|
|
90
|
+
|
|
91
|
+
if self.name:
|
|
92
|
+
message = f"{self.name}: {message}"
|
|
82
93
|
|
|
83
94
|
if not runs:
|
|
84
95
|
return f"[{severity}] {message}"
|
|
@@ -90,3 +101,24 @@ class NotificationBase:
|
|
|
90
101
|
html += "<br>click the hyper links below to see detailed results<br>"
|
|
91
102
|
html += runs.show(display=False, short=True)
|
|
92
103
|
return html
|
|
104
|
+
|
|
105
|
+
def _get_overview_type_and_url(
|
|
106
|
+
self,
|
|
107
|
+
alert: mlrun.common.schemas.AlertConfig,
|
|
108
|
+
event_data: mlrun.common.schemas.Event,
|
|
109
|
+
) -> (str, str):
|
|
110
|
+
if (
|
|
111
|
+
event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
|
|
112
|
+
): # JOB entity
|
|
113
|
+
uid = event_data.value_dict.get("uid")
|
|
114
|
+
url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
|
|
115
|
+
overview_type = "Job overview"
|
|
116
|
+
else: # MODEL entity
|
|
117
|
+
model_name = event_data.value_dict.get("model")
|
|
118
|
+
model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
|
|
119
|
+
url = mlrun.utils.helpers.get_model_endpoint_url(
|
|
120
|
+
alert.project, model_name, model_endpoint_id
|
|
121
|
+
)
|
|
122
|
+
overview_type = "Model endpoint"
|
|
123
|
+
|
|
124
|
+
return overview_type, url
|
|
@@ -153,20 +153,7 @@ class SlackNotification(NotificationBase):
|
|
|
153
153
|
data_text = "\n".join(data_lines)
|
|
154
154
|
line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
|
|
155
155
|
|
|
156
|
-
|
|
157
|
-
event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
|
|
158
|
-
): # JOB entity
|
|
159
|
-
uid = event_data.value_dict.get("uid")
|
|
160
|
-
url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
|
|
161
|
-
overview_type = "Job overview"
|
|
162
|
-
else: # MODEL entity
|
|
163
|
-
model_name = event_data.value_dict.get("model")
|
|
164
|
-
model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
|
|
165
|
-
url = mlrun.utils.helpers.get_model_endpoint_url(
|
|
166
|
-
alert.project, model_name, model_endpoint_id
|
|
167
|
-
)
|
|
168
|
-
overview_type = "Model endpoint"
|
|
169
|
-
|
|
156
|
+
overview_type, url = self._get_overview_type_and_url(alert, event_data)
|
|
170
157
|
line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
|
|
171
158
|
|
|
172
159
|
return line
|
mlrun/utils/version/version.json
CHANGED