mlrun 1.7.0rc21__py3-none-any.whl → 1.7.0rc23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +42 -17
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/feature_store.py +78 -28
- mlrun/config.py +3 -0
- mlrun/db/base.py +1 -0
- mlrun/db/httpdb.py +9 -6
- mlrun/db/nopdb.py +1 -0
- mlrun/errors.py +1 -3
- mlrun/execution.py +2 -0
- mlrun/launcher/local.py +4 -0
- mlrun/launcher/remote.py +1 -0
- mlrun/model.py +2 -0
- mlrun/model_monitoring/api.py +1 -0
- mlrun/model_monitoring/applications/base.py +3 -3
- mlrun/model_monitoring/db/stores/__init__.py +27 -21
- mlrun/model_monitoring/db/stores/base/store.py +1 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +8 -8
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +8 -8
- mlrun/model_monitoring/db/tsdb/__init__.py +1 -1
- mlrun/model_monitoring/db/tsdb/base.py +1 -14
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +22 -18
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +67 -46
- mlrun/model_monitoring/helpers.py +25 -4
- mlrun/model_monitoring/stream_processing.py +9 -11
- mlrun/model_monitoring/writer.py +10 -6
- mlrun/projects/operations.py +5 -0
- mlrun/projects/project.py +11 -1
- mlrun/runtimes/base.py +6 -0
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/local.py +7 -1
- mlrun/runtimes/nuclio/application/application.py +0 -2
- mlrun/runtimes/nuclio/serving.py +9 -6
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/states.py +51 -8
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +54 -38
- mlrun/utils/helpers.py +51 -9
- mlrun/utils/notifications/notification/base.py +39 -7
- mlrun/utils/notifications/notification/slack.py +1 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc21.dist-info → mlrun-1.7.0rc23.dist-info}/METADATA +1 -1
- {mlrun-1.7.0rc21.dist-info → mlrun-1.7.0rc23.dist-info}/RECORD +47 -47
- {mlrun-1.7.0rc21.dist-info → mlrun-1.7.0rc23.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc21.dist-info → mlrun-1.7.0rc23.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc21.dist-info → mlrun-1.7.0rc23.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc21.dist-info → mlrun-1.7.0rc23.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py
CHANGED
|
@@ -2116,6 +2116,7 @@ class MlrunProject(ModelObj):
|
|
|
2116
2116
|
*,
|
|
2117
2117
|
deploy_histogram_data_drift_app: bool = True,
|
|
2118
2118
|
wait_for_deployment: bool = False,
|
|
2119
|
+
rebuild_images: bool = False,
|
|
2119
2120
|
) -> None:
|
|
2120
2121
|
"""
|
|
2121
2122
|
Deploy model monitoring application controller, writer and stream functions.
|
|
@@ -2135,6 +2136,7 @@ class MlrunProject(ModelObj):
|
|
|
2135
2136
|
:param wait_for_deployment: If true, return only after the deployment is done on the backend.
|
|
2136
2137
|
Otherwise, deploy the model monitoring infrastructure on the
|
|
2137
2138
|
background, including the histogram data drift app if selected.
|
|
2139
|
+
:param rebuild_images: If true, force rebuild of model monitoring infrastructure images.
|
|
2138
2140
|
"""
|
|
2139
2141
|
if default_controller_image != "mlrun/mlrun":
|
|
2140
2142
|
# TODO: Remove this in 1.9.0
|
|
@@ -2150,6 +2152,7 @@ class MlrunProject(ModelObj):
|
|
|
2150
2152
|
image=image,
|
|
2151
2153
|
base_period=base_period,
|
|
2152
2154
|
deploy_histogram_data_drift_app=deploy_histogram_data_drift_app,
|
|
2155
|
+
rebuild_images=rebuild_images,
|
|
2153
2156
|
)
|
|
2154
2157
|
|
|
2155
2158
|
if wait_for_deployment:
|
|
@@ -3192,7 +3195,8 @@ class MlrunProject(ModelObj):
|
|
|
3192
3195
|
tsdb_connection: Optional[str] = None,
|
|
3193
3196
|
):
|
|
3194
3197
|
"""Set the credentials that will be used by the project's model monitoring
|
|
3195
|
-
infrastructure functions.
|
|
3198
|
+
infrastructure functions. Important to note that you have to set the credentials before deploying any
|
|
3199
|
+
model monitoring or serving function.
|
|
3196
3200
|
|
|
3197
3201
|
:param access_key: Model Monitoring access key for managing user permissions
|
|
3198
3202
|
:param endpoint_store_connection: Endpoint store connection string
|
|
@@ -3258,6 +3262,7 @@ class MlrunProject(ModelObj):
|
|
|
3258
3262
|
notifications: list[mlrun.model.Notification] = None,
|
|
3259
3263
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
3260
3264
|
builder_env: Optional[dict] = None,
|
|
3265
|
+
reset_on_run: bool = None,
|
|
3261
3266
|
) -> typing.Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
3262
3267
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
3263
3268
|
|
|
@@ -3314,6 +3319,10 @@ class MlrunProject(ModelObj):
|
|
|
3314
3319
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
3315
3320
|
"key": "the_key".
|
|
3316
3321
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
3322
|
+
:param reset_on_run: When True, function python modules would reload prior to code execution.
|
|
3323
|
+
This ensures latest code changes are executed. This argument must be used in
|
|
3324
|
+
conjunction with the local=True argument.
|
|
3325
|
+
|
|
3317
3326
|
:return: MLRun RunObject or PipelineNodeWrapper
|
|
3318
3327
|
"""
|
|
3319
3328
|
return run_function(
|
|
@@ -3339,6 +3348,7 @@ class MlrunProject(ModelObj):
|
|
|
3339
3348
|
notifications=notifications,
|
|
3340
3349
|
returns=returns,
|
|
3341
3350
|
builder_env=builder_env,
|
|
3351
|
+
reset_on_run=reset_on_run,
|
|
3342
3352
|
)
|
|
3343
3353
|
|
|
3344
3354
|
def build_function(
|
mlrun/runtimes/base.py
CHANGED
|
@@ -68,6 +68,7 @@ spec_fields = [
|
|
|
68
68
|
"disable_auto_mount",
|
|
69
69
|
"allow_empty_resources",
|
|
70
70
|
"clone_target_dir",
|
|
71
|
+
"reset_on_run",
|
|
71
72
|
]
|
|
72
73
|
|
|
73
74
|
|
|
@@ -336,6 +337,7 @@ class BaseRuntime(ModelObj):
|
|
|
336
337
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
337
338
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
338
339
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
340
|
+
reset_on_run: Optional[bool] = None,
|
|
339
341
|
**launcher_kwargs,
|
|
340
342
|
) -> RunObject:
|
|
341
343
|
"""
|
|
@@ -390,6 +392,9 @@ class BaseRuntime(ModelObj):
|
|
|
390
392
|
standards and is at least 1 minute (-1 for infinite).
|
|
391
393
|
If the phase is active for longer than the threshold, the run will be aborted.
|
|
392
394
|
See mlconf.function.spec.state_thresholds for the state options and default values.
|
|
395
|
+
:param reset_on_run: When True, function python modules would reload prior to code execution.
|
|
396
|
+
This ensures latest code changes are executed. This argument must be used in
|
|
397
|
+
conjunction with the local=True argument.
|
|
393
398
|
:return: Run context object (RunObject) with run metadata, results and status
|
|
394
399
|
"""
|
|
395
400
|
launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
|
|
@@ -418,6 +423,7 @@ class BaseRuntime(ModelObj):
|
|
|
418
423
|
notifications=notifications,
|
|
419
424
|
returns=returns,
|
|
420
425
|
state_thresholds=state_thresholds,
|
|
426
|
+
reset_on_run=reset_on_run,
|
|
421
427
|
)
|
|
422
428
|
|
|
423
429
|
def _get_db_run(self, task: RunObject = None):
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -494,6 +494,7 @@ class DaskCluster(KubejobRuntime):
|
|
|
494
494
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
495
495
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
496
496
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
497
|
+
reset_on_run: Optional[bool] = None,
|
|
497
498
|
**launcher_kwargs,
|
|
498
499
|
) -> RunObject:
|
|
499
500
|
if state_thresholds:
|
|
@@ -232,6 +232,7 @@ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
|
|
|
232
232
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
233
233
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
234
234
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
235
|
+
reset_on_run: Optional[bool] = None,
|
|
235
236
|
**launcher_kwargs,
|
|
236
237
|
) -> RunObject:
|
|
237
238
|
if local:
|
mlrun/runtimes/local.py
CHANGED
|
@@ -391,7 +391,13 @@ def load_module(file_name, handler, context):
|
|
|
391
391
|
if context:
|
|
392
392
|
class_args = copy(context._parameters.get("_init_args", {}))
|
|
393
393
|
|
|
394
|
-
return get_handler_extended(
|
|
394
|
+
return get_handler_extended(
|
|
395
|
+
handler,
|
|
396
|
+
context,
|
|
397
|
+
class_args,
|
|
398
|
+
namespaces=module,
|
|
399
|
+
reload_modules=context._reset_on_run,
|
|
400
|
+
)
|
|
395
401
|
|
|
396
402
|
|
|
397
403
|
def run_exec(cmd, args, env=None, cwd=None):
|
|
@@ -263,7 +263,6 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
263
263
|
is_kfp=False,
|
|
264
264
|
mlrun_version_specifier=None,
|
|
265
265
|
show_on_failure: bool = False,
|
|
266
|
-
skip_access_key_auth: bool = False,
|
|
267
266
|
direct_port_access: bool = False,
|
|
268
267
|
authentication_mode: schemas.APIGatewayAuthenticationMode = None,
|
|
269
268
|
authentication_creds: tuple[str] = None,
|
|
@@ -283,7 +282,6 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
283
282
|
:param is_kfp: Deploy as part of a kfp pipeline
|
|
284
283
|
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
285
284
|
:param show_on_failure: Show logs only in case of build failure
|
|
286
|
-
:param skip_access_key_auth: Skip adding access key auth to the API Gateway
|
|
287
285
|
:param direct_port_access: Set True to allow direct port access to the application sidecar
|
|
288
286
|
:param authentication_mode: API Gateway authentication mode
|
|
289
287
|
:param authentication_creds: API Gateway authentication credentials as a tuple (username, password)
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -312,15 +312,18 @@ class ServingRuntime(RemoteRuntime):
|
|
|
312
312
|
sample: Optional[int] = None,
|
|
313
313
|
stream_args: Optional[dict] = None,
|
|
314
314
|
tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
|
|
315
|
+
enable_tracking: bool = True,
|
|
315
316
|
) -> None:
|
|
316
317
|
"""apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
|
|
317
318
|
and analyze performance.
|
|
318
319
|
|
|
319
|
-
:param stream_path:
|
|
320
|
-
|
|
321
|
-
:param batch:
|
|
322
|
-
:param sample:
|
|
323
|
-
:param stream_args:
|
|
320
|
+
:param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
|
|
321
|
+
you can use the "dummy://" path for test/simulation.
|
|
322
|
+
:param batch: Micro batch size (send micro batches of N records at a time).
|
|
323
|
+
:param sample: Sample size (send only one of N records).
|
|
324
|
+
:param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
|
|
325
|
+
:param enable_tracking: Enabled/Disable model-monitoring tracking.
|
|
326
|
+
Default True (tracking enabled).
|
|
324
327
|
|
|
325
328
|
example::
|
|
326
329
|
|
|
@@ -331,7 +334,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
331
334
|
|
|
332
335
|
"""
|
|
333
336
|
# Applying model monitoring configurations
|
|
334
|
-
self.spec.track_models =
|
|
337
|
+
self.spec.track_models = enable_tracking
|
|
335
338
|
|
|
336
339
|
if stream_path:
|
|
337
340
|
self.spec.parameters["log_stream"] = stream_path
|
mlrun/serving/__init__.py
CHANGED
|
@@ -22,10 +22,17 @@ __all__ = [
|
|
|
22
22
|
"RouterStep",
|
|
23
23
|
"QueueStep",
|
|
24
24
|
"ErrorStep",
|
|
25
|
+
"MonitoringApplicationStep",
|
|
25
26
|
]
|
|
26
27
|
|
|
27
28
|
from .routers import ModelRouter, VotingEnsemble # noqa
|
|
28
29
|
from .server import GraphContext, GraphServer, create_graph_server # noqa
|
|
29
|
-
from .states import
|
|
30
|
+
from .states import (
|
|
31
|
+
ErrorStep,
|
|
32
|
+
QueueStep,
|
|
33
|
+
RouterStep,
|
|
34
|
+
TaskStep,
|
|
35
|
+
MonitoringApplicationStep,
|
|
36
|
+
) # noqa
|
|
30
37
|
from .v1_serving import MLModelServer, new_v1_model_server # noqa
|
|
31
38
|
from .v2_serving import V2ModelServer # noqa
|
mlrun/serving/states.py
CHANGED
|
@@ -12,7 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
__all__ = [
|
|
15
|
+
__all__ = [
|
|
16
|
+
"TaskStep",
|
|
17
|
+
"RouterStep",
|
|
18
|
+
"RootFlowStep",
|
|
19
|
+
"ErrorStep",
|
|
20
|
+
"MonitoringApplicationStep",
|
|
21
|
+
]
|
|
16
22
|
|
|
17
23
|
import os
|
|
18
24
|
import pathlib
|
|
@@ -55,6 +61,7 @@ class StepKinds:
|
|
|
55
61
|
choice = "choice"
|
|
56
62
|
root = "root"
|
|
57
63
|
error_step = "error_step"
|
|
64
|
+
monitoring_application = "monitoring_application"
|
|
58
65
|
|
|
59
66
|
|
|
60
67
|
_task_step_fields = [
|
|
@@ -485,13 +492,15 @@ class TaskStep(BaseStep):
|
|
|
485
492
|
class_args[key] = arg
|
|
486
493
|
class_args.update(extra_kwargs)
|
|
487
494
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
+
if not isinstance(self, MonitoringApplicationStep):
|
|
496
|
+
# add common args (name, context, ..) only if target class can accept them
|
|
497
|
+
argspec = getfullargspec(class_object)
|
|
498
|
+
|
|
499
|
+
for key in ["name", "context", "input_path", "result_path", "full_event"]:
|
|
500
|
+
if argspec.varkw or key in argspec.args:
|
|
501
|
+
class_args[key] = getattr(self, key)
|
|
502
|
+
if argspec.varkw or "graph_step" in argspec.args:
|
|
503
|
+
class_args["graph_step"] = self
|
|
495
504
|
return class_args
|
|
496
505
|
|
|
497
506
|
def get_step_class_object(self, namespace):
|
|
@@ -582,6 +591,39 @@ class TaskStep(BaseStep):
|
|
|
582
591
|
return event
|
|
583
592
|
|
|
584
593
|
|
|
594
|
+
class MonitoringApplicationStep(TaskStep):
|
|
595
|
+
"""monitoring application execution step, runs users class code"""
|
|
596
|
+
|
|
597
|
+
kind = "monitoring_application"
|
|
598
|
+
_default_class = ""
|
|
599
|
+
|
|
600
|
+
def __init__(
|
|
601
|
+
self,
|
|
602
|
+
class_name: Union[str, type] = None,
|
|
603
|
+
class_args: dict = None,
|
|
604
|
+
handler: str = None,
|
|
605
|
+
name: str = None,
|
|
606
|
+
after: list = None,
|
|
607
|
+
full_event: bool = None,
|
|
608
|
+
function: str = None,
|
|
609
|
+
responder: bool = None,
|
|
610
|
+
input_path: str = None,
|
|
611
|
+
result_path: str = None,
|
|
612
|
+
):
|
|
613
|
+
super().__init__(
|
|
614
|
+
class_name=class_name,
|
|
615
|
+
class_args=class_args,
|
|
616
|
+
handler=handler,
|
|
617
|
+
name=name,
|
|
618
|
+
after=after,
|
|
619
|
+
full_event=full_event,
|
|
620
|
+
function=function,
|
|
621
|
+
responder=responder,
|
|
622
|
+
input_path=input_path,
|
|
623
|
+
result_path=result_path,
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
|
|
585
627
|
class ErrorStep(TaskStep):
|
|
586
628
|
"""error execution step, runs a class or handler"""
|
|
587
629
|
|
|
@@ -1323,6 +1365,7 @@ classes_map = {
|
|
|
1323
1365
|
"flow": FlowStep,
|
|
1324
1366
|
"queue": QueueStep,
|
|
1325
1367
|
"error_step": ErrorStep,
|
|
1368
|
+
"monitoring_application": MonitoringApplicationStep,
|
|
1326
1369
|
}
|
|
1327
1370
|
|
|
1328
1371
|
|
mlrun/serving/utils.py
CHANGED
|
@@ -46,6 +46,15 @@ def _update_result_body(result_path, event_body, result):
|
|
|
46
46
|
class StepToDict:
|
|
47
47
|
"""auto serialization of graph steps to a python dictionary"""
|
|
48
48
|
|
|
49
|
+
meta_keys = [
|
|
50
|
+
"context",
|
|
51
|
+
"name",
|
|
52
|
+
"input_path",
|
|
53
|
+
"result_path",
|
|
54
|
+
"full_event",
|
|
55
|
+
"kwargs",
|
|
56
|
+
]
|
|
57
|
+
|
|
49
58
|
def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
|
|
50
59
|
"""convert the step object to a python dictionary"""
|
|
51
60
|
fields = fields or getattr(self, "_dict_fields", None)
|
|
@@ -54,24 +63,16 @@ class StepToDict:
|
|
|
54
63
|
if exclude:
|
|
55
64
|
fields = [field for field in fields if field not in exclude]
|
|
56
65
|
|
|
57
|
-
meta_keys = [
|
|
58
|
-
"context",
|
|
59
|
-
"name",
|
|
60
|
-
"input_path",
|
|
61
|
-
"result_path",
|
|
62
|
-
"full_event",
|
|
63
|
-
"kwargs",
|
|
64
|
-
]
|
|
65
66
|
args = {
|
|
66
67
|
key: getattr(self, key)
|
|
67
68
|
for key in fields
|
|
68
|
-
if getattr(self, key, None) is not None and key not in meta_keys
|
|
69
|
+
if getattr(self, key, None) is not None and key not in self.meta_keys
|
|
69
70
|
}
|
|
70
71
|
# add storey kwargs or extra kwargs
|
|
71
72
|
if "kwargs" in fields and (hasattr(self, "kwargs") or hasattr(self, "_kwargs")):
|
|
72
73
|
kwargs = getattr(self, "kwargs", {}) or getattr(self, "_kwargs", {})
|
|
73
74
|
for key, value in kwargs.items():
|
|
74
|
-
if key not in meta_keys:
|
|
75
|
+
if key not in self.meta_keys:
|
|
75
76
|
args[key] = value
|
|
76
77
|
|
|
77
78
|
mod_name = self.__class__.__module__
|
|
@@ -80,7 +81,9 @@ class StepToDict:
|
|
|
80
81
|
class_path = f"{mod_name}.{class_path}"
|
|
81
82
|
struct = {
|
|
82
83
|
"class_name": class_path,
|
|
83
|
-
"name": self.name
|
|
84
|
+
"name": self.name
|
|
85
|
+
if hasattr(self, "name") and self.name
|
|
86
|
+
else self.__class__.__name__,
|
|
84
87
|
"class_args": args,
|
|
85
88
|
}
|
|
86
89
|
if hasattr(self, "_STEP_KIND"):
|
|
@@ -94,6 +97,11 @@ class StepToDict:
|
|
|
94
97
|
return struct
|
|
95
98
|
|
|
96
99
|
|
|
100
|
+
class MonitoringApplicationToDict(StepToDict):
|
|
101
|
+
_STEP_KIND = "monitoring_application"
|
|
102
|
+
meta_keys = []
|
|
103
|
+
|
|
104
|
+
|
|
97
105
|
class RouterToDict(StepToDict):
|
|
98
106
|
_STEP_KIND = "router"
|
|
99
107
|
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -542,48 +542,64 @@ def _init_endpoint_record(
|
|
|
542
542
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
543
543
|
).uid
|
|
544
544
|
|
|
545
|
-
# If model endpoint object was found in DB, skip the creation process.
|
|
546
545
|
try:
|
|
547
|
-
mlrun.get_run_db().get_model_endpoint(
|
|
548
|
-
|
|
546
|
+
model_ep = mlrun.get_run_db().get_model_endpoint(
|
|
547
|
+
project=project, endpoint_id=uid
|
|
548
|
+
)
|
|
549
549
|
except mlrun.errors.MLRunNotFoundError:
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
active=True,
|
|
566
|
-
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
567
|
-
if model.context.server.track_models
|
|
568
|
-
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
|
|
569
|
-
),
|
|
570
|
-
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
571
|
-
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
|
|
550
|
+
model_ep = None
|
|
551
|
+
|
|
552
|
+
if model.context.server.track_models and not model_ep:
|
|
553
|
+
logger.debug("Creating a new model endpoint record", endpoint_id=uid)
|
|
554
|
+
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
555
|
+
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
556
|
+
project=project, labels=model.labels, uid=uid
|
|
557
|
+
),
|
|
558
|
+
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
559
|
+
function_uri=graph_server.function_uri,
|
|
560
|
+
model=versioned_model_name,
|
|
561
|
+
model_class=model.__class__.__name__,
|
|
562
|
+
model_uri=model.model_path,
|
|
563
|
+
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
564
|
+
project=project, kind="stream"
|
|
572
565
|
),
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
model_endpoint=model_endpoint.dict(),
|
|
581
|
-
)
|
|
566
|
+
active=True,
|
|
567
|
+
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
568
|
+
),
|
|
569
|
+
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
570
|
+
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
|
|
571
|
+
),
|
|
572
|
+
)
|
|
582
573
|
|
|
583
|
-
|
|
584
|
-
|
|
574
|
+
db = mlrun.get_run_db()
|
|
575
|
+
db.create_model_endpoint(
|
|
576
|
+
project=project,
|
|
577
|
+
endpoint_id=uid,
|
|
578
|
+
model_endpoint=model_endpoint.dict(),
|
|
579
|
+
)
|
|
585
580
|
|
|
586
|
-
|
|
587
|
-
|
|
581
|
+
elif (
|
|
582
|
+
model_ep
|
|
583
|
+
and (
|
|
584
|
+
model_ep.spec.monitoring_mode
|
|
585
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
586
|
+
)
|
|
587
|
+
!= model.context.server.track_models
|
|
588
|
+
):
|
|
589
|
+
monitoring_mode = (
|
|
590
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
591
|
+
if model.context.server.track_models
|
|
592
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
593
|
+
)
|
|
594
|
+
db = mlrun.get_run_db()
|
|
595
|
+
db.patch_model_endpoint(
|
|
596
|
+
project=project,
|
|
597
|
+
endpoint_id=uid,
|
|
598
|
+
attributes={"monitoring_mode": monitoring_mode},
|
|
599
|
+
)
|
|
600
|
+
logger.debug(
|
|
601
|
+
f"Updating model endpoint monitoring_mode to {monitoring_mode}",
|
|
602
|
+
endpoint_id=uid,
|
|
603
|
+
)
|
|
588
604
|
|
|
589
605
|
return uid
|
mlrun/utils/helpers.py
CHANGED
|
@@ -26,7 +26,7 @@ import sys
|
|
|
26
26
|
import typing
|
|
27
27
|
import warnings
|
|
28
28
|
from datetime import datetime, timezone
|
|
29
|
-
from importlib import import_module
|
|
29
|
+
from importlib import import_module, reload
|
|
30
30
|
from os import path
|
|
31
31
|
from types import ModuleType
|
|
32
32
|
from typing import Any, Optional
|
|
@@ -1019,16 +1019,35 @@ def create_class(pkg_class: str):
|
|
|
1019
1019
|
return class_
|
|
1020
1020
|
|
|
1021
1021
|
|
|
1022
|
-
def create_function(pkg_func: str):
|
|
1022
|
+
def create_function(pkg_func: str, reload_modules: bool = False):
|
|
1023
1023
|
"""Create a function from a package.module.function string
|
|
1024
1024
|
|
|
1025
1025
|
:param pkg_func: full function location,
|
|
1026
1026
|
e.g. "sklearn.feature_selection.f_classif"
|
|
1027
|
+
:param reload_modules: reload the function again.
|
|
1027
1028
|
"""
|
|
1028
1029
|
splits = pkg_func.split(".")
|
|
1029
1030
|
pkg_module = ".".join(splits[:-1])
|
|
1030
1031
|
cb_fname = splits[-1]
|
|
1031
1032
|
pkg_module = __import__(pkg_module, fromlist=[cb_fname])
|
|
1033
|
+
|
|
1034
|
+
if reload_modules:
|
|
1035
|
+
# Even though the function appears in the modules list, we need to reload
|
|
1036
|
+
# the code again because it may have changed
|
|
1037
|
+
try:
|
|
1038
|
+
logger.debug("Reloading module", module=pkg_func)
|
|
1039
|
+
_reload(
|
|
1040
|
+
pkg_module,
|
|
1041
|
+
max_recursion_depth=mlrun.mlconf.function.spec.reload_max_recursion_depth,
|
|
1042
|
+
)
|
|
1043
|
+
except Exception as exc:
|
|
1044
|
+
logger.warning(
|
|
1045
|
+
"Failed to reload module. Not all associated modules can be reloaded, import them manually."
|
|
1046
|
+
"Or, with Jupyter, restart the Python kernel.",
|
|
1047
|
+
module=pkg_func,
|
|
1048
|
+
err=mlrun.errors.err_to_str(exc),
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1032
1051
|
function_ = getattr(pkg_module, cb_fname)
|
|
1033
1052
|
return function_
|
|
1034
1053
|
|
|
@@ -1086,8 +1105,14 @@ def get_class(class_name, namespace=None):
|
|
|
1086
1105
|
return class_object
|
|
1087
1106
|
|
|
1088
1107
|
|
|
1089
|
-
def get_function(function,
|
|
1090
|
-
"""
|
|
1108
|
+
def get_function(function, namespaces, reload_modules: bool = False):
|
|
1109
|
+
"""Return function callable object from function name string
|
|
1110
|
+
|
|
1111
|
+
:param function: path to the function ([class_name::]function)
|
|
1112
|
+
:param namespaces: one or list of namespaces/modules to search the function in
|
|
1113
|
+
:param reload_modules: reload the function again
|
|
1114
|
+
:return: function handler (callable)
|
|
1115
|
+
"""
|
|
1091
1116
|
if callable(function):
|
|
1092
1117
|
return function
|
|
1093
1118
|
|
|
@@ -1096,12 +1121,12 @@ def get_function(function, namespace):
|
|
|
1096
1121
|
if not function.endswith(")"):
|
|
1097
1122
|
raise ValueError('function expression must start with "(" and end with ")"')
|
|
1098
1123
|
return eval("lambda event: " + function[1:-1], {}, {})
|
|
1099
|
-
function_object = _search_in_namespaces(function,
|
|
1124
|
+
function_object = _search_in_namespaces(function, namespaces)
|
|
1100
1125
|
if function_object is not None:
|
|
1101
1126
|
return function_object
|
|
1102
1127
|
|
|
1103
1128
|
try:
|
|
1104
|
-
function_object = create_function(function)
|
|
1129
|
+
function_object = create_function(function, reload_modules)
|
|
1105
1130
|
except (ImportError, ValueError) as exc:
|
|
1106
1131
|
raise ImportError(
|
|
1107
1132
|
f"state/function init failed, handler '{function}' not found"
|
|
@@ -1110,19 +1135,24 @@ def get_function(function, namespace):
|
|
|
1110
1135
|
|
|
1111
1136
|
|
|
1112
1137
|
def get_handler_extended(
|
|
1113
|
-
handler_path: str,
|
|
1138
|
+
handler_path: str,
|
|
1139
|
+
context=None,
|
|
1140
|
+
class_args: dict = None,
|
|
1141
|
+
namespaces=None,
|
|
1142
|
+
reload_modules: bool = False,
|
|
1114
1143
|
):
|
|
1115
|
-
"""
|
|
1144
|
+
"""Get function handler from [class_name::]handler string
|
|
1116
1145
|
|
|
1117
1146
|
:param handler_path: path to the function ([class_name::]handler)
|
|
1118
1147
|
:param context: MLRun function/job client context
|
|
1119
1148
|
:param class_args: optional dict of class init kwargs
|
|
1120
1149
|
:param namespaces: one or list of namespaces/modules to search the handler in
|
|
1150
|
+
:param reload_modules: reload the function again
|
|
1121
1151
|
:return: function handler (callable)
|
|
1122
1152
|
"""
|
|
1123
1153
|
class_args = class_args or {}
|
|
1124
1154
|
if "::" not in handler_path:
|
|
1125
|
-
return get_function(handler_path, namespaces)
|
|
1155
|
+
return get_function(handler_path, namespaces, reload_modules)
|
|
1126
1156
|
|
|
1127
1157
|
splitted = handler_path.split("::")
|
|
1128
1158
|
class_path = splitted[0].strip()
|
|
@@ -1628,3 +1658,15 @@ def format_alert_summary(
|
|
|
1628
1658
|
result = result.replace("{{name}}", alert.name)
|
|
1629
1659
|
result = result.replace("{{entity}}", event_data.entity.ids[0])
|
|
1630
1660
|
return result
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
def _reload(module, max_recursion_depth):
|
|
1664
|
+
"""Recursively reload modules."""
|
|
1665
|
+
if max_recursion_depth <= 0:
|
|
1666
|
+
return
|
|
1667
|
+
|
|
1668
|
+
reload(module)
|
|
1669
|
+
for attribute_name in dir(module):
|
|
1670
|
+
attribute = getattr(module, attribute_name)
|
|
1671
|
+
if type(attribute) is ModuleType:
|
|
1672
|
+
_reload(attribute, max_recursion_depth - 1)
|
|
@@ -69,16 +69,27 @@ class NotificationBase:
|
|
|
69
69
|
if custom_html:
|
|
70
70
|
return custom_html
|
|
71
71
|
|
|
72
|
-
if self.name:
|
|
73
|
-
message = f"{self.name}: {message}"
|
|
74
|
-
|
|
75
72
|
if alert:
|
|
76
73
|
if not event_data:
|
|
77
74
|
return f"[{severity}] {message}"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
75
|
+
|
|
76
|
+
html = f"<h3>[{severity}] {message}</h3>"
|
|
77
|
+
html += f"<br>{alert.name} alert has occurred<br>"
|
|
78
|
+
html += f"<br><h4>Project:</h4>{alert.project}<br>"
|
|
79
|
+
html += f"<br><h4>ID:</h4>{event_data.entity.ids[0]}<br>"
|
|
80
|
+
html += f"<br><h4>Summary:</h4>{mlrun.utils.helpers.format_alert_summary(alert, event_data)}<br>"
|
|
81
|
+
|
|
82
|
+
if event_data.value_dict:
|
|
83
|
+
html += "<br><h4>Event data:</h4>"
|
|
84
|
+
for key, value in event_data.value_dict.items():
|
|
85
|
+
html += f"{key}: {value}<br>"
|
|
86
|
+
|
|
87
|
+
overview_type, url = self._get_overview_type_and_url(alert, event_data)
|
|
88
|
+
html += f"<br><h4>Overview:</h4><a href={url}>{overview_type}</a>"
|
|
89
|
+
return html
|
|
90
|
+
|
|
91
|
+
if self.name:
|
|
92
|
+
message = f"{self.name}: {message}"
|
|
82
93
|
|
|
83
94
|
if not runs:
|
|
84
95
|
return f"[{severity}] {message}"
|
|
@@ -90,3 +101,24 @@ class NotificationBase:
|
|
|
90
101
|
html += "<br>click the hyper links below to see detailed results<br>"
|
|
91
102
|
html += runs.show(display=False, short=True)
|
|
92
103
|
return html
|
|
104
|
+
|
|
105
|
+
def _get_overview_type_and_url(
|
|
106
|
+
self,
|
|
107
|
+
alert: mlrun.common.schemas.AlertConfig,
|
|
108
|
+
event_data: mlrun.common.schemas.Event,
|
|
109
|
+
) -> (str, str):
|
|
110
|
+
if (
|
|
111
|
+
event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
|
|
112
|
+
): # JOB entity
|
|
113
|
+
uid = event_data.value_dict.get("uid")
|
|
114
|
+
url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
|
|
115
|
+
overview_type = "Job overview"
|
|
116
|
+
else: # MODEL entity
|
|
117
|
+
model_name = event_data.value_dict.get("model")
|
|
118
|
+
model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
|
|
119
|
+
url = mlrun.utils.helpers.get_model_endpoint_url(
|
|
120
|
+
alert.project, model_name, model_endpoint_id
|
|
121
|
+
)
|
|
122
|
+
overview_type = "Model endpoint"
|
|
123
|
+
|
|
124
|
+
return overview_type, url
|
|
@@ -153,20 +153,7 @@ class SlackNotification(NotificationBase):
|
|
|
153
153
|
data_text = "\n".join(data_lines)
|
|
154
154
|
line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
|
|
155
155
|
|
|
156
|
-
|
|
157
|
-
event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
|
|
158
|
-
): # JOB entity
|
|
159
|
-
uid = event_data.value_dict.get("uid")
|
|
160
|
-
url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
|
|
161
|
-
overview_type = "Job overview"
|
|
162
|
-
else: # MODEL entity
|
|
163
|
-
model_name = event_data.value_dict.get("model")
|
|
164
|
-
model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
|
|
165
|
-
url = mlrun.utils.helpers.get_model_endpoint_url(
|
|
166
|
-
alert.project, model_name, model_endpoint_id
|
|
167
|
-
)
|
|
168
|
-
overview_type = "Model endpoint"
|
|
169
|
-
|
|
156
|
+
overview_type, url = self._get_overview_type_and_url(alert, event_data)
|
|
170
157
|
line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
|
|
171
158
|
|
|
172
159
|
return line
|