mlrun 1.7.0rc16__py3-none-any.whl → 1.7.0rc17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +26 -23
- mlrun/artifacts/model.py +1 -1
- mlrun/common/schemas/__init__.py +7 -1
- mlrun/common/schemas/alert.py +18 -1
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/common/schemas/project.py +3 -1
- mlrun/config.py +7 -3
- mlrun/db/base.py +1 -1
- mlrun/db/nopdb.py +5 -2
- mlrun/lists.py +2 -0
- mlrun/model.py +8 -6
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -0
- mlrun/model_monitoring/writer.py +4 -4
- mlrun/projects/project.py +7 -3
- mlrun/runtimes/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +97 -77
- mlrun/runtimes/nuclio/application/application.py +160 -7
- mlrun/runtimes/nuclio/function.py +18 -12
- mlrun/track/tracker.py +2 -1
- mlrun/utils/helpers.py +8 -2
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification_pusher.py +7 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc17.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc17.dist-info}/RECORD +29 -29
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc17.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc17.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc17.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc17.dist-info}/top_level.txt +0 -0
mlrun/alerts/alert.py
CHANGED
|
@@ -15,8 +15,7 @@
|
|
|
15
15
|
from typing import Union
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
|
-
import mlrun.common.schemas.alert as
|
|
19
|
-
from mlrun.common.schemas.notification import Notification
|
|
18
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
20
19
|
from mlrun.model import ModelObj
|
|
21
20
|
|
|
22
21
|
|
|
@@ -36,17 +35,17 @@ class AlertConfig(ModelObj):
|
|
|
36
35
|
self,
|
|
37
36
|
project: str = None,
|
|
38
37
|
name: str = None,
|
|
39
|
-
template: Union[
|
|
38
|
+
template: Union[alert_objects.AlertTemplate, str] = None,
|
|
40
39
|
description: str = None,
|
|
41
40
|
summary: str = None,
|
|
42
|
-
severity:
|
|
43
|
-
trigger:
|
|
44
|
-
criteria:
|
|
45
|
-
reset_policy:
|
|
46
|
-
notifications: list[
|
|
47
|
-
entities:
|
|
41
|
+
severity: alert_objects.AlertSeverity = None,
|
|
42
|
+
trigger: alert_objects.AlertTrigger = None,
|
|
43
|
+
criteria: alert_objects.AlertCriteria = None,
|
|
44
|
+
reset_policy: alert_objects.ResetPolicy = None,
|
|
45
|
+
notifications: list[alert_objects.AlertNotification] = None,
|
|
46
|
+
entities: alert_objects.EventEntities = None,
|
|
48
47
|
id: int = None,
|
|
49
|
-
state:
|
|
48
|
+
state: alert_objects.AlertActiveState = None,
|
|
50
49
|
created: str = None,
|
|
51
50
|
count: int = None,
|
|
52
51
|
):
|
|
@@ -81,8 +80,10 @@ class AlertConfig(ModelObj):
|
|
|
81
80
|
else self.entities
|
|
82
81
|
)
|
|
83
82
|
data["notifications"] = [
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
notification_data.dict()
|
|
84
|
+
if not isinstance(notification_data, dict)
|
|
85
|
+
else notification_data
|
|
86
|
+
for notification_data in self.notifications
|
|
86
87
|
]
|
|
87
88
|
data["trigger"] = (
|
|
88
89
|
self.trigger.dict() if not isinstance(self.trigger, dict) else self.trigger
|
|
@@ -95,35 +96,37 @@ class AlertConfig(ModelObj):
|
|
|
95
96
|
|
|
96
97
|
entity_data = struct.get("entities")
|
|
97
98
|
if entity_data:
|
|
98
|
-
entity_obj =
|
|
99
|
+
entity_obj = alert_objects.EventEntities.parse_obj(entity_data)
|
|
99
100
|
new_obj.entities = entity_obj
|
|
100
101
|
|
|
101
102
|
notifications_data = struct.get("notifications")
|
|
102
103
|
if notifications_data:
|
|
103
104
|
notifications_objs = [
|
|
104
|
-
|
|
105
|
-
for
|
|
105
|
+
alert_objects.AlertNotification.parse_obj(notification)
|
|
106
|
+
for notification in notifications_data
|
|
106
107
|
]
|
|
107
108
|
new_obj.notifications = notifications_objs
|
|
108
109
|
|
|
109
110
|
trigger_data = struct.get("trigger")
|
|
110
111
|
if trigger_data:
|
|
111
|
-
trigger_obj =
|
|
112
|
+
trigger_obj = alert_objects.AlertTrigger.parse_obj(trigger_data)
|
|
112
113
|
new_obj.trigger = trigger_obj
|
|
113
114
|
|
|
114
115
|
return new_obj
|
|
115
116
|
|
|
116
|
-
def with_notifications(self, notifications: list[
|
|
117
|
+
def with_notifications(self, notifications: list[alert_objects.AlertNotification]):
|
|
117
118
|
if not isinstance(notifications, list) or not all(
|
|
118
|
-
isinstance(item,
|
|
119
|
+
isinstance(item, alert_objects.AlertNotification) for item in notifications
|
|
119
120
|
):
|
|
120
|
-
raise ValueError(
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Notifications parameter must be a list of AlertNotification"
|
|
123
|
+
)
|
|
124
|
+
for notification_data in notifications:
|
|
125
|
+
self.notifications.append(notification_data)
|
|
123
126
|
return self
|
|
124
127
|
|
|
125
|
-
def with_entities(self, entities:
|
|
126
|
-
if not isinstance(entities,
|
|
128
|
+
def with_entities(self, entities: alert_objects.EventEntities):
|
|
129
|
+
if not isinstance(entities, alert_objects.EventEntities):
|
|
127
130
|
raise ValueError("Entities parameter must be of type: EventEntities")
|
|
128
131
|
self.entities = entities
|
|
129
132
|
return self
|
mlrun/artifacts/model.py
CHANGED
|
@@ -509,7 +509,7 @@ def _get_extra(target, extra_data, is_dir=False):
|
|
|
509
509
|
def _remove_tag_from_spec_yaml(model_spec):
|
|
510
510
|
spec_dict = model_spec.to_dict()
|
|
511
511
|
spec_dict["metadata"].pop("tag", None)
|
|
512
|
-
return yaml.
|
|
512
|
+
return yaml.safe_dump(spec_dict)
|
|
513
513
|
|
|
514
514
|
|
|
515
515
|
def update_model(
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -14,7 +14,13 @@
|
|
|
14
14
|
#
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
|
|
17
|
-
from .alert import
|
|
17
|
+
from .alert import (
|
|
18
|
+
AlertActiveState,
|
|
19
|
+
AlertConfig,
|
|
20
|
+
AlertNotification,
|
|
21
|
+
AlertTemplate,
|
|
22
|
+
Event,
|
|
23
|
+
)
|
|
18
24
|
from .api_gateway import (
|
|
19
25
|
APIGateway,
|
|
20
26
|
APIGatewayAuthenticationMode,
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -101,6 +101,18 @@ class ResetPolicy(StrEnum):
|
|
|
101
101
|
AUTO = "auto"
|
|
102
102
|
|
|
103
103
|
|
|
104
|
+
class AlertNotification(pydantic.BaseModel):
|
|
105
|
+
notification: Notification
|
|
106
|
+
cooldown_period: Annotated[
|
|
107
|
+
str,
|
|
108
|
+
pydantic.Field(
|
|
109
|
+
description="Period during which notifications "
|
|
110
|
+
"will not be sent after initial send. The format of this would be in time."
|
|
111
|
+
" e.g. 1d, 3h, 5m, 15s"
|
|
112
|
+
),
|
|
113
|
+
] = None
|
|
114
|
+
|
|
115
|
+
|
|
104
116
|
class AlertConfig(pydantic.BaseModel):
|
|
105
117
|
project: str
|
|
106
118
|
id: int = None
|
|
@@ -121,10 +133,15 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
121
133
|
trigger: AlertTrigger
|
|
122
134
|
criteria: Optional[AlertCriteria]
|
|
123
135
|
reset_policy: ResetPolicy = ResetPolicy.MANUAL
|
|
124
|
-
notifications: pydantic.conlist(
|
|
136
|
+
notifications: pydantic.conlist(AlertNotification, min_items=1)
|
|
125
137
|
state: AlertActiveState = AlertActiveState.INACTIVE
|
|
126
138
|
count: Optional[int] = 0
|
|
127
139
|
|
|
140
|
+
def get_raw_notifications(self) -> list[Notification]:
|
|
141
|
+
return [
|
|
142
|
+
alert_notification.notification for alert_notification in self.notifications
|
|
143
|
+
]
|
|
144
|
+
|
|
128
145
|
|
|
129
146
|
class AlertsModes(StrEnum):
|
|
130
147
|
enabled = "enabled"
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -113,7 +113,9 @@ class ProjectSummary(pydantic.BaseModel):
|
|
|
113
113
|
runs_completed_recent_count: int
|
|
114
114
|
runs_failed_recent_count: int
|
|
115
115
|
runs_running_count: int
|
|
116
|
-
|
|
116
|
+
distinct_schedules_count: int
|
|
117
|
+
distinct_scheduled_jobs_pending_count: int
|
|
118
|
+
distinct_scheduled_pipelines_pending_count: int
|
|
117
119
|
pipelines_completed_recent_count: typing.Optional[int] = None
|
|
118
120
|
pipelines_failed_recent_count: typing.Optional[int] = None
|
|
119
121
|
pipelines_running_count: typing.Optional[int] = None
|
mlrun/config.py
CHANGED
|
@@ -232,6 +232,10 @@ default_config = {
|
|
|
232
232
|
"databricks": {
|
|
233
233
|
"artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
|
|
234
234
|
},
|
|
235
|
+
"application": {
|
|
236
|
+
"default_sidecar_internal_port": 8050,
|
|
237
|
+
"default_authentication_mode": "accessKey",
|
|
238
|
+
},
|
|
235
239
|
},
|
|
236
240
|
# TODO: function defaults should be moved to the function spec config above
|
|
237
241
|
"function_defaults": {
|
|
@@ -1402,14 +1406,14 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1402
1406
|
if log_formatter_name := config.get("log_formatter"):
|
|
1403
1407
|
import mlrun.utils.logger
|
|
1404
1408
|
|
|
1405
|
-
log_formatter = mlrun.utils.
|
|
1409
|
+
log_formatter = mlrun.utils.resolve_formatter_by_kind(
|
|
1406
1410
|
mlrun.utils.FormatterKinds(log_formatter_name)
|
|
1407
1411
|
)
|
|
1408
1412
|
current_handler = mlrun.utils.logger.get_handler("default")
|
|
1409
1413
|
current_formatter_name = current_handler.formatter.__class__.__name__
|
|
1410
|
-
desired_formatter_name = log_formatter.
|
|
1414
|
+
desired_formatter_name = log_formatter.__name__
|
|
1411
1415
|
if current_formatter_name != desired_formatter_name:
|
|
1412
|
-
current_handler.setFormatter(log_formatter)
|
|
1416
|
+
current_handler.setFormatter(log_formatter())
|
|
1413
1417
|
|
|
1414
1418
|
# The default function pod resource values are of type str; however, when reading from environment variable numbers,
|
|
1415
1419
|
# it converts them to type int if contains only number, so we want to convert them to str.
|
mlrun/db/base.py
CHANGED
mlrun/db/nopdb.py
CHANGED
|
@@ -520,8 +520,11 @@ class NopDB(RunDBInterface):
|
|
|
520
520
|
|
|
521
521
|
def store_api_gateway(
|
|
522
522
|
self,
|
|
523
|
-
|
|
524
|
-
|
|
523
|
+
api_gateway: Union[
|
|
524
|
+
mlrun.common.schemas.APIGateway,
|
|
525
|
+
mlrun.runtimes.nuclio.api_gateway.APIGateway,
|
|
526
|
+
],
|
|
527
|
+
project: str = None,
|
|
525
528
|
) -> mlrun.common.schemas.APIGateway:
|
|
526
529
|
pass
|
|
527
530
|
|
mlrun/lists.py
CHANGED
|
@@ -29,6 +29,7 @@ list_header = [
|
|
|
29
29
|
"iter",
|
|
30
30
|
"start",
|
|
31
31
|
"state",
|
|
32
|
+
"kind",
|
|
32
33
|
"name",
|
|
33
34
|
"labels",
|
|
34
35
|
"inputs",
|
|
@@ -57,6 +58,7 @@ class RunList(list):
|
|
|
57
58
|
get_in(run, "metadata.iteration", ""),
|
|
58
59
|
get_in(run, "status.start_time", ""),
|
|
59
60
|
get_in(run, "status.state", ""),
|
|
61
|
+
get_in(run, "step_kind", get_in(run, "kind", "")),
|
|
60
62
|
get_in(run, "metadata.name", ""),
|
|
61
63
|
get_in(run, "metadata.labels", ""),
|
|
62
64
|
get_in(run, "spec.inputs", ""),
|
mlrun/model.py
CHANGED
|
@@ -681,10 +681,14 @@ class Notification(ModelObj):
|
|
|
681
681
|
|
|
682
682
|
def __init__(
|
|
683
683
|
self,
|
|
684
|
-
kind=
|
|
684
|
+
kind: mlrun.common.schemas.notification.NotificationKind = (
|
|
685
|
+
mlrun.common.schemas.notification.NotificationKind.slack
|
|
686
|
+
),
|
|
685
687
|
name=None,
|
|
686
688
|
message=None,
|
|
687
|
-
severity=
|
|
689
|
+
severity: mlrun.common.schemas.notification.NotificationSeverity = (
|
|
690
|
+
mlrun.common.schemas.notification.NotificationSeverity.INFO
|
|
691
|
+
),
|
|
688
692
|
when=None,
|
|
689
693
|
condition=None,
|
|
690
694
|
secret_params=None,
|
|
@@ -693,12 +697,10 @@ class Notification(ModelObj):
|
|
|
693
697
|
sent_time=None,
|
|
694
698
|
reason=None,
|
|
695
699
|
):
|
|
696
|
-
self.kind = kind
|
|
700
|
+
self.kind = kind
|
|
697
701
|
self.name = name or ""
|
|
698
702
|
self.message = message or ""
|
|
699
|
-
self.severity =
|
|
700
|
-
severity or mlrun.common.schemas.notification.NotificationSeverity.INFO
|
|
701
|
-
)
|
|
703
|
+
self.severity = severity
|
|
702
704
|
self.when = when or ["completed"]
|
|
703
705
|
self.condition = condition or ""
|
|
704
706
|
self.secret_params = secret_params or {}
|
|
@@ -99,6 +99,23 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
99
99
|
monitoring_application_path + mm_constants.MonitoringTSDBTables.METRICS
|
|
100
100
|
)
|
|
101
101
|
|
|
102
|
+
monitoring_predictions_full_path = (
|
|
103
|
+
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
104
|
+
project=self.project,
|
|
105
|
+
kind=mm_constants.FileTargetKind.PREDICTIONS,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
(
|
|
109
|
+
_,
|
|
110
|
+
_,
|
|
111
|
+
monitoring_predictions_path,
|
|
112
|
+
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
113
|
+
monitoring_predictions_full_path
|
|
114
|
+
)
|
|
115
|
+
self.tables[mm_constants.FileTargetKind.PREDICTIONS] = (
|
|
116
|
+
monitoring_predictions_path
|
|
117
|
+
)
|
|
118
|
+
|
|
102
119
|
def create_tsdb_application_tables(self):
|
|
103
120
|
"""
|
|
104
121
|
Create the application tables using the TSDB connector. At the moment we support 2 types of application tables:
|
|
@@ -134,6 +151,27 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
134
151
|
- custom_metrics (user-defined metrics)
|
|
135
152
|
"""
|
|
136
153
|
|
|
154
|
+
# Write latency per prediction, labeled by endpoint ID only
|
|
155
|
+
graph.add_step(
|
|
156
|
+
"storey.TSDBTarget",
|
|
157
|
+
name="tsdb_predictions",
|
|
158
|
+
after="MapFeatureNames",
|
|
159
|
+
path=f"{self.container}/{self.tables[mm_constants.FileTargetKind.PREDICTIONS]}",
|
|
160
|
+
rate="1/s",
|
|
161
|
+
time_col=mm_constants.EventFieldType.TIMESTAMP,
|
|
162
|
+
container=self.container,
|
|
163
|
+
v3io_frames=self.v3io_framesd,
|
|
164
|
+
columns=["latency"],
|
|
165
|
+
index_cols=[
|
|
166
|
+
mm_constants.EventFieldType.ENDPOINT_ID,
|
|
167
|
+
],
|
|
168
|
+
aggr="count,avg",
|
|
169
|
+
aggr_granularity="1m",
|
|
170
|
+
max_events=tsdb_batching_max_events,
|
|
171
|
+
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
172
|
+
key=mm_constants.EventFieldType.ENDPOINT_ID,
|
|
173
|
+
)
|
|
174
|
+
|
|
137
175
|
# Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
|
|
138
176
|
# stats and details about the events
|
|
139
177
|
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -17,7 +17,7 @@ from typing import Any, NewType
|
|
|
17
17
|
|
|
18
18
|
import mlrun.common.model_monitoring
|
|
19
19
|
import mlrun.common.schemas
|
|
20
|
-
import mlrun.common.schemas.alert as
|
|
20
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
21
21
|
import mlrun.model_monitoring
|
|
22
22
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
23
23
|
EventFieldType,
|
|
@@ -123,14 +123,14 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
123
123
|
) -> None:
|
|
124
124
|
logger.info("Sending an alert")
|
|
125
125
|
entity = mlrun.common.schemas.alert.EventEntities(
|
|
126
|
-
kind=
|
|
126
|
+
kind=alert_objects.EventEntityKind.MODEL,
|
|
127
127
|
project=project_name,
|
|
128
128
|
ids=[model_endpoint],
|
|
129
129
|
)
|
|
130
130
|
event_kind = (
|
|
131
|
-
|
|
131
|
+
alert_objects.EventKind.DRIFT_DETECTED
|
|
132
132
|
if drift_status == ResultStatusApp.detected.value
|
|
133
|
-
else
|
|
133
|
+
else alert_objects.EventKind.DRIFT_SUSPECTED
|
|
134
134
|
)
|
|
135
135
|
event_data = mlrun.common.schemas.Event(
|
|
136
136
|
kind=event_kind, entity=entity, value_dict=event_value
|
mlrun/projects/project.py
CHANGED
|
@@ -2962,8 +2962,12 @@ class MlrunProject(ModelObj):
|
|
|
2962
2962
|
engine = "remote"
|
|
2963
2963
|
# The default engine is kfp if not given:
|
|
2964
2964
|
workflow_engine = get_workflow_engine(engine or workflow_spec.engine, local)
|
|
2965
|
-
if not inner_engine and engine == "remote":
|
|
2966
|
-
|
|
2965
|
+
if not inner_engine and workflow_engine.engine == "remote":
|
|
2966
|
+
# if inner engine is set to remote, assume kfp as the default inner engine with remote as the runner
|
|
2967
|
+
engine_kind = (
|
|
2968
|
+
workflow_spec.engine if workflow_spec.engine != "remote" else "kfp"
|
|
2969
|
+
)
|
|
2970
|
+
inner_engine = get_workflow_engine(engine_kind, local).engine
|
|
2967
2971
|
workflow_spec.engine = inner_engine or workflow_engine.engine
|
|
2968
2972
|
|
|
2969
2973
|
run = workflow_engine.run(
|
|
@@ -2991,7 +2995,7 @@ class MlrunProject(ModelObj):
|
|
|
2991
2995
|
# run's engine gets replaced with inner engine if engine is remote,
|
|
2992
2996
|
# so in that case we need to get the status from the remote engine manually
|
|
2993
2997
|
# TODO: support watch for remote:local
|
|
2994
|
-
if engine == "remote" and status_engine.engine != "local":
|
|
2998
|
+
if workflow_engine.engine == "remote" and status_engine.engine != "local":
|
|
2995
2999
|
status_engine = _RemoteRunner
|
|
2996
3000
|
|
|
2997
3001
|
status_engine.get_run_status(project=self, run=run, timeout=timeout)
|
mlrun/runtimes/__init__.py
CHANGED
|
@@ -43,6 +43,7 @@ from .nuclio import (
|
|
|
43
43
|
new_v2_model_server,
|
|
44
44
|
nuclio_init_hook,
|
|
45
45
|
)
|
|
46
|
+
from .nuclio.api_gateway import APIGateway
|
|
46
47
|
from .nuclio.application import ApplicationRuntime
|
|
47
48
|
from .nuclio.serving import serving_subkind
|
|
48
49
|
from .remotesparkjob import RemoteSparkRuntime
|