mlrun 1.7.0rc16__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +27 -24
- mlrun/artifacts/manager.py +5 -1
- mlrun/artifacts/model.py +1 -1
- mlrun/common/runtimes/constants.py +3 -0
- mlrun/common/schemas/__init__.py +8 -2
- mlrun/common/schemas/alert.py +49 -10
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +3 -1
- mlrun/common/schemas/model_monitoring/constants.py +21 -1
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
- mlrun/common/schemas/project.py +3 -1
- mlrun/config.py +9 -3
- mlrun/data_types/to_pandas.py +5 -5
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +111 -6
- mlrun/datastore/targets.py +2 -2
- mlrun/db/base.py +6 -2
- mlrun/db/httpdb.py +22 -3
- mlrun/db/nopdb.py +10 -3
- mlrun/errors.py +6 -0
- mlrun/feature_store/retrieval/conversion.py +5 -5
- mlrun/feature_store/retrieval/job.py +3 -2
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
- mlrun/lists.py +2 -0
- mlrun/model.py +8 -6
- mlrun/model_monitoring/db/stores/base/store.py +16 -3
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
- mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
- mlrun/model_monitoring/db/tsdb/base.py +25 -18
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +103 -64
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
- mlrun/model_monitoring/helpers.py +32 -0
- mlrun/model_monitoring/stream_processing.py +7 -4
- mlrun/model_monitoring/writer.py +19 -14
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/projects/project.py +40 -11
- mlrun/render.py +8 -5
- mlrun/runtimes/__init__.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +97 -77
- mlrun/runtimes/nuclio/application/application.py +160 -7
- mlrun/runtimes/nuclio/function.py +18 -12
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +28 -3
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification/slack.py +27 -7
- mlrun/utils/notifications/notification_pusher.py +45 -41
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +8 -3
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +65 -61
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0
mlrun/alerts/alert.py
CHANGED
|
@@ -15,8 +15,7 @@
|
|
|
15
15
|
from typing import Union
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
|
-
import mlrun.common.schemas.alert as
|
|
19
|
-
from mlrun.common.schemas.notification import Notification
|
|
18
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
20
19
|
from mlrun.model import ModelObj
|
|
21
20
|
|
|
22
21
|
|
|
@@ -36,17 +35,17 @@ class AlertConfig(ModelObj):
|
|
|
36
35
|
self,
|
|
37
36
|
project: str = None,
|
|
38
37
|
name: str = None,
|
|
39
|
-
template: Union[
|
|
38
|
+
template: Union[alert_objects.AlertTemplate, str] = None,
|
|
40
39
|
description: str = None,
|
|
41
40
|
summary: str = None,
|
|
42
|
-
severity:
|
|
43
|
-
trigger:
|
|
44
|
-
criteria:
|
|
45
|
-
reset_policy:
|
|
46
|
-
notifications: list[
|
|
47
|
-
entities:
|
|
41
|
+
severity: alert_objects.AlertSeverity = None,
|
|
42
|
+
trigger: alert_objects.AlertTrigger = None,
|
|
43
|
+
criteria: alert_objects.AlertCriteria = None,
|
|
44
|
+
reset_policy: alert_objects.ResetPolicy = None,
|
|
45
|
+
notifications: list[alert_objects.AlertNotification] = None,
|
|
46
|
+
entities: alert_objects.EventEntities = None,
|
|
48
47
|
id: int = None,
|
|
49
|
-
state:
|
|
48
|
+
state: alert_objects.AlertActiveState = None,
|
|
50
49
|
created: str = None,
|
|
51
50
|
count: int = None,
|
|
52
51
|
):
|
|
@@ -81,8 +80,10 @@ class AlertConfig(ModelObj):
|
|
|
81
80
|
else self.entities
|
|
82
81
|
)
|
|
83
82
|
data["notifications"] = [
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
notification_data.dict()
|
|
84
|
+
if not isinstance(notification_data, dict)
|
|
85
|
+
else notification_data
|
|
86
|
+
for notification_data in self.notifications
|
|
86
87
|
]
|
|
87
88
|
data["trigger"] = (
|
|
88
89
|
self.trigger.dict() if not isinstance(self.trigger, dict) else self.trigger
|
|
@@ -95,35 +96,37 @@ class AlertConfig(ModelObj):
|
|
|
95
96
|
|
|
96
97
|
entity_data = struct.get("entities")
|
|
97
98
|
if entity_data:
|
|
98
|
-
entity_obj =
|
|
99
|
+
entity_obj = alert_objects.EventEntities.parse_obj(entity_data)
|
|
99
100
|
new_obj.entities = entity_obj
|
|
100
101
|
|
|
101
102
|
notifications_data = struct.get("notifications")
|
|
102
103
|
if notifications_data:
|
|
103
104
|
notifications_objs = [
|
|
104
|
-
|
|
105
|
-
for
|
|
105
|
+
alert_objects.AlertNotification.parse_obj(notification)
|
|
106
|
+
for notification in notifications_data
|
|
106
107
|
]
|
|
107
108
|
new_obj.notifications = notifications_objs
|
|
108
109
|
|
|
109
110
|
trigger_data = struct.get("trigger")
|
|
110
111
|
if trigger_data:
|
|
111
|
-
trigger_obj =
|
|
112
|
+
trigger_obj = alert_objects.AlertTrigger.parse_obj(trigger_data)
|
|
112
113
|
new_obj.trigger = trigger_obj
|
|
113
114
|
|
|
114
115
|
return new_obj
|
|
115
116
|
|
|
116
|
-
def with_notifications(self, notifications: list[
|
|
117
|
+
def with_notifications(self, notifications: list[alert_objects.AlertNotification]):
|
|
117
118
|
if not isinstance(notifications, list) or not all(
|
|
118
|
-
isinstance(item,
|
|
119
|
+
isinstance(item, alert_objects.AlertNotification) for item in notifications
|
|
119
120
|
):
|
|
120
|
-
raise ValueError(
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Notifications parameter must be a list of AlertNotification"
|
|
123
|
+
)
|
|
124
|
+
for notification_data in notifications:
|
|
125
|
+
self.notifications.append(notification_data)
|
|
123
126
|
return self
|
|
124
127
|
|
|
125
|
-
def with_entities(self, entities:
|
|
126
|
-
if not isinstance(entities,
|
|
128
|
+
def with_entities(self, entities: alert_objects.EventEntities):
|
|
129
|
+
if not isinstance(entities, alert_objects.EventEntities):
|
|
127
130
|
raise ValueError("Entities parameter must be of type: EventEntities")
|
|
128
131
|
self.entities = entities
|
|
129
132
|
return self
|
|
@@ -134,7 +137,7 @@ class AlertConfig(ModelObj):
|
|
|
134
137
|
template = db.get_alert_template(template)
|
|
135
138
|
|
|
136
139
|
# Extract parameters from the template and apply them to the AlertConfig object
|
|
137
|
-
self.
|
|
140
|
+
self.summary = template.summary
|
|
138
141
|
self.severity = template.severity
|
|
139
142
|
self.criteria = template.criteria
|
|
140
143
|
self.trigger = template.trigger
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -72,6 +72,10 @@ class ArtifactProducer:
|
|
|
72
72
|
def get_meta(self) -> dict:
|
|
73
73
|
return {"kind": self.kind, "name": self.name, "tag": self.tag}
|
|
74
74
|
|
|
75
|
+
@property
|
|
76
|
+
def uid(self):
|
|
77
|
+
return None
|
|
78
|
+
|
|
75
79
|
|
|
76
80
|
def dict_to_artifact(struct: dict) -> Artifact:
|
|
77
81
|
kind = struct.get("kind", "")
|
|
@@ -262,7 +266,7 @@ class ArtifactManager:
|
|
|
262
266
|
if target_path and item.is_dir and not target_path.endswith("/"):
|
|
263
267
|
target_path += "/"
|
|
264
268
|
target_path = template_artifact_path(
|
|
265
|
-
artifact_path=target_path, project=producer.project
|
|
269
|
+
artifact_path=target_path, project=producer.project, run_uid=producer.uid
|
|
266
270
|
)
|
|
267
271
|
item.target_path = target_path
|
|
268
272
|
|
mlrun/artifacts/model.py
CHANGED
|
@@ -509,7 +509,7 @@ def _get_extra(target, extra_data, is_dir=False):
|
|
|
509
509
|
def _remove_tag_from_spec_yaml(model_spec):
|
|
510
510
|
spec_dict = model_spec.to_dict()
|
|
511
511
|
spec_dict["metadata"].pop("tag", None)
|
|
512
|
-
return yaml.
|
|
512
|
+
return yaml.safe_dump(spec_dict)
|
|
513
513
|
|
|
514
514
|
|
|
515
515
|
def update_model(
|
|
@@ -136,6 +136,7 @@ class RunStates:
|
|
|
136
136
|
unknown = "unknown"
|
|
137
137
|
aborted = "aborted"
|
|
138
138
|
aborting = "aborting"
|
|
139
|
+
skipped = "skipped"
|
|
139
140
|
|
|
140
141
|
@staticmethod
|
|
141
142
|
def all():
|
|
@@ -148,6 +149,7 @@ class RunStates:
|
|
|
148
149
|
RunStates.unknown,
|
|
149
150
|
RunStates.aborted,
|
|
150
151
|
RunStates.aborting,
|
|
152
|
+
RunStates.skipped,
|
|
151
153
|
]
|
|
152
154
|
|
|
153
155
|
@staticmethod
|
|
@@ -156,6 +158,7 @@ class RunStates:
|
|
|
156
158
|
RunStates.completed,
|
|
157
159
|
RunStates.error,
|
|
158
160
|
RunStates.aborted,
|
|
161
|
+
RunStates.skipped,
|
|
159
162
|
]
|
|
160
163
|
|
|
161
164
|
@staticmethod
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -14,7 +14,13 @@
|
|
|
14
14
|
#
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
|
|
17
|
-
from .alert import
|
|
17
|
+
from .alert import (
|
|
18
|
+
AlertActiveState,
|
|
19
|
+
AlertConfig,
|
|
20
|
+
AlertNotification,
|
|
21
|
+
AlertTemplate,
|
|
22
|
+
Event,
|
|
23
|
+
)
|
|
18
24
|
from .api_gateway import (
|
|
19
25
|
APIGateway,
|
|
20
26
|
APIGatewayAuthenticationMode,
|
|
@@ -142,10 +148,10 @@ from .model_monitoring import (
|
|
|
142
148
|
ModelMonitoringMode,
|
|
143
149
|
ModelMonitoringStoreKinds,
|
|
144
150
|
MonitoringFunctionNames,
|
|
145
|
-
MonitoringTSDBTables,
|
|
146
151
|
PrometheusEndpoints,
|
|
147
152
|
TimeSeriesConnector,
|
|
148
153
|
TSDBTarget,
|
|
154
|
+
V3IOTSDBTables,
|
|
149
155
|
)
|
|
150
156
|
from .notification import (
|
|
151
157
|
Notification,
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -22,7 +22,7 @@ from mlrun.common.types import StrEnum
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class EventEntityKind(StrEnum):
|
|
25
|
-
|
|
25
|
+
MODEL_ENDPOINT_RESULT = "model-endpoint-result"
|
|
26
26
|
JOB = "job"
|
|
27
27
|
|
|
28
28
|
|
|
@@ -33,14 +33,34 @@ class EventEntities(pydantic.BaseModel):
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class EventKind(StrEnum):
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
DATA_DRIFT_DETECTED = "data_drift_detected"
|
|
37
|
+
DATA_DRIFT_SUSPECTED = "data_drift_suspected"
|
|
38
|
+
CONCEPT_DRIFT_DETECTED = "concept_drift_detected"
|
|
39
|
+
CONCEPT_DRIFT_SUSPECTED = "concept_drift_suspected"
|
|
40
|
+
MODEL_PERFORMANCE_DETECTED = "model_performance_detected"
|
|
41
|
+
MODEL_PERFORMANCE_SUSPECTED = "model_performance_suspected"
|
|
42
|
+
MODEL_SERVING_PERFORMANCE_DETECTED = "model_serving_performance_detected"
|
|
43
|
+
MODEL_SERVING_PERFORMANCE_SUSPECTED = "model_serving_performance_suspected"
|
|
44
|
+
MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
|
|
45
|
+
MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
|
|
38
46
|
FAILED = "failed"
|
|
39
47
|
|
|
40
48
|
|
|
41
49
|
_event_kind_entity_map = {
|
|
42
|
-
EventKind.
|
|
43
|
-
EventKind.
|
|
50
|
+
EventKind.DATA_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
51
|
+
EventKind.DATA_DRIFT_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
52
|
+
EventKind.CONCEPT_DRIFT_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
53
|
+
EventKind.CONCEPT_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
54
|
+
EventKind.MODEL_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
55
|
+
EventKind.MODEL_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
56
|
+
EventKind.MODEL_SERVING_PERFORMANCE_DETECTED: [
|
|
57
|
+
EventEntityKind.MODEL_ENDPOINT_RESULT
|
|
58
|
+
],
|
|
59
|
+
EventKind.MODEL_SERVING_PERFORMANCE_SUSPECTED: [
|
|
60
|
+
EventEntityKind.MODEL_ENDPOINT_RESULT
|
|
61
|
+
],
|
|
62
|
+
EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
63
|
+
EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
|
|
44
64
|
EventKind.FAILED: [EventEntityKind.JOB],
|
|
45
65
|
}
|
|
46
66
|
|
|
@@ -101,6 +121,18 @@ class ResetPolicy(StrEnum):
|
|
|
101
121
|
AUTO = "auto"
|
|
102
122
|
|
|
103
123
|
|
|
124
|
+
class AlertNotification(pydantic.BaseModel):
|
|
125
|
+
notification: Notification
|
|
126
|
+
cooldown_period: Annotated[
|
|
127
|
+
str,
|
|
128
|
+
pydantic.Field(
|
|
129
|
+
description="Period during which notifications "
|
|
130
|
+
"will not be sent after initial send. The format of this would be in time."
|
|
131
|
+
" e.g. 1d, 3h, 5m, 15s"
|
|
132
|
+
),
|
|
133
|
+
] = None
|
|
134
|
+
|
|
135
|
+
|
|
104
136
|
class AlertConfig(pydantic.BaseModel):
|
|
105
137
|
project: str
|
|
106
138
|
id: int = None
|
|
@@ -111,7 +143,8 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
111
143
|
pydantic.Field(
|
|
112
144
|
description=(
|
|
113
145
|
"String to be sent in the notifications generated."
|
|
114
|
-
"e.g. 'Model {{
|
|
146
|
+
"e.g. 'Model {{project}}/{{entity}} is drifting.'"
|
|
147
|
+
"Supported variables: project, entity, name"
|
|
115
148
|
)
|
|
116
149
|
),
|
|
117
150
|
]
|
|
@@ -121,10 +154,15 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
121
154
|
trigger: AlertTrigger
|
|
122
155
|
criteria: Optional[AlertCriteria]
|
|
123
156
|
reset_policy: ResetPolicy = ResetPolicy.MANUAL
|
|
124
|
-
notifications: pydantic.conlist(
|
|
157
|
+
notifications: pydantic.conlist(AlertNotification, min_items=1)
|
|
125
158
|
state: AlertActiveState = AlertActiveState.INACTIVE
|
|
126
159
|
count: Optional[int] = 0
|
|
127
160
|
|
|
161
|
+
def get_raw_notifications(self) -> list[Notification]:
|
|
162
|
+
return [
|
|
163
|
+
alert_notification.notification for alert_notification in self.notifications
|
|
164
|
+
]
|
|
165
|
+
|
|
128
166
|
|
|
129
167
|
class AlertsModes(StrEnum):
|
|
130
168
|
enabled = "enabled"
|
|
@@ -144,8 +182,9 @@ class AlertTemplate(
|
|
|
144
182
|
system_generated: bool = False
|
|
145
183
|
|
|
146
184
|
# AlertConfig fields that are pre-defined
|
|
147
|
-
|
|
148
|
-
"String to be sent in the generated notifications e.g. 'Model {{
|
|
185
|
+
summary: Optional[str] = (
|
|
186
|
+
"String to be sent in the generated notifications e.g. 'Model {{project}}/{{entity}} is drifting.'"
|
|
187
|
+
"See AlertConfig.summary description"
|
|
149
188
|
)
|
|
150
189
|
severity: AlertSeverity
|
|
151
190
|
trigger: AlertTrigger
|
|
@@ -156,7 +195,7 @@ class AlertTemplate(
|
|
|
156
195
|
def templates_differ(self, other):
|
|
157
196
|
return (
|
|
158
197
|
self.template_description != other.template_description
|
|
159
|
-
or self.
|
|
198
|
+
or self.summary != other.summary
|
|
160
199
|
or self.severity != other.severity
|
|
161
200
|
or self.trigger != other.trigger
|
|
162
201
|
or self.reset_policy != other.reset_policy
|
|
@@ -59,6 +59,7 @@ class ClientSpec(pydantic.BaseModel):
|
|
|
59
59
|
sql_url: typing.Optional[str]
|
|
60
60
|
model_endpoint_monitoring_store_type: typing.Optional[str]
|
|
61
61
|
model_endpoint_monitoring_endpoint_store_connection: typing.Optional[str]
|
|
62
|
+
model_monitoring_tsdb_connection: typing.Optional[str]
|
|
62
63
|
ce: typing.Optional[dict]
|
|
63
64
|
# not passing them as one object as it possible client user would like to override only one of the params
|
|
64
65
|
calculate_artifact_hash: typing.Optional[str]
|
mlrun/common/schemas/function.py
CHANGED
|
@@ -45,6 +45,9 @@ class FunctionState:
|
|
|
45
45
|
# same goes for the build which is not coming from the pod, but is used and we can't just omit it for BC reasons
|
|
46
46
|
build = "build"
|
|
47
47
|
|
|
48
|
+
# for pipeline steps
|
|
49
|
+
skipped = "skipped"
|
|
50
|
+
|
|
48
51
|
@classmethod
|
|
49
52
|
def get_function_state_from_pod_state(cls, pod_state: str):
|
|
50
53
|
if pod_state == "succeeded":
|
|
@@ -60,6 +63,7 @@ class FunctionState:
|
|
|
60
63
|
return [
|
|
61
64
|
cls.ready,
|
|
62
65
|
cls.error,
|
|
66
|
+
cls.skipped,
|
|
63
67
|
]
|
|
64
68
|
|
|
65
69
|
|
|
@@ -30,20 +30,22 @@ from .constants import (
|
|
|
30
30
|
ModelMonitoringMode,
|
|
31
31
|
ModelMonitoringStoreKinds,
|
|
32
32
|
MonitoringFunctionNames,
|
|
33
|
-
MonitoringTSDBTables,
|
|
34
33
|
ProjectSecretKeys,
|
|
35
34
|
PrometheusEndpoints,
|
|
36
35
|
PrometheusMetric,
|
|
37
36
|
ResultData,
|
|
38
37
|
SchedulingKeys,
|
|
38
|
+
TDEngineSuperTables,
|
|
39
39
|
TimeSeriesConnector,
|
|
40
40
|
TSDBTarget,
|
|
41
|
+
V3IOTSDBTables,
|
|
41
42
|
VersionedModel,
|
|
42
43
|
WriterEvent,
|
|
43
44
|
WriterEventKind,
|
|
44
45
|
)
|
|
45
46
|
from .grafana import (
|
|
46
47
|
GrafanaColumn,
|
|
48
|
+
GrafanaColumnType,
|
|
47
49
|
GrafanaDataPoint,
|
|
48
50
|
GrafanaNumberColumn,
|
|
49
51
|
GrafanaStringColumn,
|
|
@@ -81,6 +81,8 @@ class EventFieldType:
|
|
|
81
81
|
DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
|
|
82
82
|
POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
|
|
83
83
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
84
|
+
TIME = "time"
|
|
85
|
+
TABLE_COLUMN = "table_column"
|
|
84
86
|
|
|
85
87
|
|
|
86
88
|
class FeatureSetFeatures(MonitoringStrEnum):
|
|
@@ -171,6 +173,7 @@ class ProjectSecretKeys:
|
|
|
171
173
|
PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
|
|
172
174
|
KAFKA_BROKERS = "KAFKA_BROKERS"
|
|
173
175
|
STREAM_PATH = "STREAM_PATH"
|
|
176
|
+
TSDB_CONNECTION = "TSDB_CONNECTION"
|
|
174
177
|
|
|
175
178
|
|
|
176
179
|
class ModelMonitoringStoreKinds:
|
|
@@ -188,6 +191,7 @@ class SchedulingKeys:
|
|
|
188
191
|
class FileTargetKind:
|
|
189
192
|
ENDPOINTS = "endpoints"
|
|
190
193
|
EVENTS = "events"
|
|
194
|
+
PREDICTIONS = "predictions"
|
|
191
195
|
STREAM = "stream"
|
|
192
196
|
PARQUET = "parquet"
|
|
193
197
|
APPS_PARQUET = "apps_parquet"
|
|
@@ -229,12 +233,18 @@ class MonitoringFunctionNames(MonitoringStrEnum):
|
|
|
229
233
|
WRITER = "model-monitoring-writer"
|
|
230
234
|
|
|
231
235
|
|
|
232
|
-
class
|
|
236
|
+
class V3IOTSDBTables(MonitoringStrEnum):
|
|
233
237
|
APP_RESULTS = "app-results"
|
|
234
238
|
METRICS = "metrics"
|
|
235
239
|
EVENTS = "events"
|
|
236
240
|
|
|
237
241
|
|
|
242
|
+
class TDEngineSuperTables(MonitoringStrEnum):
|
|
243
|
+
APP_RESULTS = "app_results"
|
|
244
|
+
METRICS = "metrics"
|
|
245
|
+
PREDICTIONS = "predictions"
|
|
246
|
+
|
|
247
|
+
|
|
238
248
|
@dataclass
|
|
239
249
|
class FunctionURI:
|
|
240
250
|
project: str
|
|
@@ -338,6 +348,7 @@ class ControllerPolicy:
|
|
|
338
348
|
|
|
339
349
|
class TSDBTarget:
|
|
340
350
|
V3IO_TSDB = "v3io-tsdb"
|
|
351
|
+
TDEngine = "tdengine"
|
|
341
352
|
PROMETHEUS = "prometheus"
|
|
342
353
|
APP_RESULTS_TABLE = "app-results"
|
|
343
354
|
V3IO_BE = "tsdb"
|
|
@@ -347,3 +358,12 @@ class TSDBTarget:
|
|
|
347
358
|
class HistogramDataDriftApplicationConstants:
|
|
348
359
|
NAME = "histogram-data-drift"
|
|
349
360
|
GENERAL_RESULT_NAME = "general_drift"
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class PredictionsQueryConstants:
|
|
364
|
+
DEFAULT_AGGREGATION_GRANULARITY = "10m"
|
|
365
|
+
INVOCATIONS = "invocations"
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class SpecialApps:
|
|
369
|
+
MLRUN_INFRA = "mlrun-infra"
|
|
@@ -11,12 +11,18 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
from typing import Optional, Union
|
|
17
16
|
|
|
18
17
|
from pydantic import BaseModel
|
|
19
18
|
|
|
19
|
+
import mlrun.common.types
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GrafanaColumnType(mlrun.common.types.StrEnum):
|
|
23
|
+
NUMBER = "number"
|
|
24
|
+
STRING = "string"
|
|
25
|
+
|
|
20
26
|
|
|
21
27
|
class GrafanaColumn(BaseModel):
|
|
22
28
|
text: str
|
|
@@ -24,13 +30,11 @@ class GrafanaColumn(BaseModel):
|
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
class GrafanaNumberColumn(GrafanaColumn):
|
|
27
|
-
|
|
28
|
-
type: str = "number"
|
|
33
|
+
type: str = GrafanaColumnType.NUMBER
|
|
29
34
|
|
|
30
35
|
|
|
31
36
|
class GrafanaStringColumn(GrafanaColumn):
|
|
32
|
-
|
|
33
|
-
type: str = "string"
|
|
37
|
+
type: str = GrafanaColumnType.STRING
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
class GrafanaTable(BaseModel):
|
|
@@ -298,6 +298,7 @@ class ModelEndpointList(BaseModel):
|
|
|
298
298
|
|
|
299
299
|
class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
|
|
300
300
|
RESULT = "result"
|
|
301
|
+
METRIC = "metric"
|
|
301
302
|
|
|
302
303
|
|
|
303
304
|
class ModelEndpointMonitoringMetric(BaseModel):
|
|
@@ -322,7 +323,7 @@ _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
|
|
|
322
323
|
_FQN_PATTERN = (
|
|
323
324
|
rf"^(?P<project>{_FQN_PART_PATTERN})\."
|
|
324
325
|
rf"(?P<app>{_FQN_PART_PATTERN})\."
|
|
325
|
-
rf"(?P<type>{
|
|
326
|
+
rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
|
|
326
327
|
rf"(?P<name>{_FQN_PART_PATTERN})$"
|
|
327
328
|
)
|
|
328
329
|
_FQN_REGEX = re.compile(_FQN_PATTERN)
|
|
@@ -337,27 +338,37 @@ def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringM
|
|
|
337
338
|
)
|
|
338
339
|
|
|
339
340
|
|
|
341
|
+
class _MetricPoint(NamedTuple):
|
|
342
|
+
timestamp: datetime
|
|
343
|
+
value: float
|
|
344
|
+
|
|
345
|
+
|
|
340
346
|
class _ResultPoint(NamedTuple):
|
|
341
347
|
timestamp: datetime
|
|
342
348
|
value: float
|
|
343
349
|
status: ResultStatusApp
|
|
344
350
|
|
|
345
351
|
|
|
346
|
-
class
|
|
352
|
+
class _ModelEndpointMonitoringMetricValuesBase(BaseModel):
|
|
347
353
|
full_name: str
|
|
348
354
|
type: ModelEndpointMonitoringMetricType
|
|
349
355
|
data: bool
|
|
350
356
|
|
|
351
357
|
|
|
352
|
-
class
|
|
353
|
-
|
|
354
|
-
|
|
358
|
+
class ModelEndpointMonitoringMetricValues(_ModelEndpointMonitoringMetricValuesBase):
|
|
359
|
+
type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.METRIC
|
|
360
|
+
values: list[_MetricPoint]
|
|
361
|
+
data: bool = True
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringMetricValuesBase):
|
|
365
|
+
type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT
|
|
355
366
|
result_kind: ResultKindApp
|
|
356
367
|
values: list[_ResultPoint]
|
|
357
368
|
data: bool = True
|
|
358
369
|
|
|
359
370
|
|
|
360
|
-
class
|
|
371
|
+
class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBase):
|
|
361
372
|
full_name: str
|
|
362
373
|
type: ModelEndpointMonitoringMetricType
|
|
363
374
|
data: bool = False
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -113,7 +113,9 @@ class ProjectSummary(pydantic.BaseModel):
|
|
|
113
113
|
runs_completed_recent_count: int
|
|
114
114
|
runs_failed_recent_count: int
|
|
115
115
|
runs_running_count: int
|
|
116
|
-
|
|
116
|
+
distinct_schedules_count: int
|
|
117
|
+
distinct_scheduled_jobs_pending_count: int
|
|
118
|
+
distinct_scheduled_pipelines_pending_count: int
|
|
117
119
|
pipelines_completed_recent_count: typing.Optional[int] = None
|
|
118
120
|
pipelines_failed_recent_count: typing.Optional[int] = None
|
|
119
121
|
pipelines_running_count: typing.Optional[int] = None
|
mlrun/config.py
CHANGED
|
@@ -232,6 +232,10 @@ default_config = {
|
|
|
232
232
|
"databricks": {
|
|
233
233
|
"artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
|
|
234
234
|
},
|
|
235
|
+
"application": {
|
|
236
|
+
"default_sidecar_internal_port": 8050,
|
|
237
|
+
"default_authentication_mode": "accessKey",
|
|
238
|
+
},
|
|
235
239
|
},
|
|
236
240
|
# TODO: function defaults should be moved to the function spec config above
|
|
237
241
|
"function_defaults": {
|
|
@@ -517,7 +521,9 @@ default_config = {
|
|
|
517
521
|
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
518
522
|
"store_type": "v3io-nosql",
|
|
519
523
|
"endpoint_store_connection": "",
|
|
524
|
+
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
520
525
|
"tsdb_connector_type": "v3io-tsdb",
|
|
526
|
+
"tsdb_connection": "",
|
|
521
527
|
},
|
|
522
528
|
"secret_stores": {
|
|
523
529
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -1402,14 +1408,14 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1402
1408
|
if log_formatter_name := config.get("log_formatter"):
|
|
1403
1409
|
import mlrun.utils.logger
|
|
1404
1410
|
|
|
1405
|
-
log_formatter = mlrun.utils.
|
|
1411
|
+
log_formatter = mlrun.utils.resolve_formatter_by_kind(
|
|
1406
1412
|
mlrun.utils.FormatterKinds(log_formatter_name)
|
|
1407
1413
|
)
|
|
1408
1414
|
current_handler = mlrun.utils.logger.get_handler("default")
|
|
1409
1415
|
current_formatter_name = current_handler.formatter.__class__.__name__
|
|
1410
|
-
desired_formatter_name = log_formatter.
|
|
1416
|
+
desired_formatter_name = log_formatter.__name__
|
|
1411
1417
|
if current_formatter_name != desired_formatter_name:
|
|
1412
|
-
current_handler.setFormatter(log_formatter)
|
|
1418
|
+
current_handler.setFormatter(log_formatter())
|
|
1413
1419
|
|
|
1414
1420
|
# The default function pod resource values are of type str; however, when reading from environment variable numbers,
|
|
1415
1421
|
# it converts them to type int if contains only number, so we want to convert them to str.
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -154,10 +154,10 @@ def toPandas(spark_df):
|
|
|
154
154
|
column_counter = Counter(spark_df.columns)
|
|
155
155
|
|
|
156
156
|
dtype = [None] * len(spark_df.schema)
|
|
157
|
-
for
|
|
157
|
+
for field_idx, field in enumerate(spark_df.schema):
|
|
158
158
|
# For duplicate column name, we use `iloc` to access it.
|
|
159
159
|
if column_counter[field.name] > 1:
|
|
160
|
-
pandas_col = pdf.iloc[:,
|
|
160
|
+
pandas_col = pdf.iloc[:, field_idx]
|
|
161
161
|
else:
|
|
162
162
|
pandas_col = pdf[field.name]
|
|
163
163
|
|
|
@@ -171,12 +171,12 @@ def toPandas(spark_df):
|
|
|
171
171
|
and field.nullable
|
|
172
172
|
and pandas_col.isnull().any()
|
|
173
173
|
):
|
|
174
|
-
dtype[
|
|
174
|
+
dtype[field_idx] = pandas_type
|
|
175
175
|
# Ensure we fall back to nullable numpy types, even when whole column is null:
|
|
176
176
|
if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
|
|
177
|
-
dtype[
|
|
177
|
+
dtype[field_idx] = np.float64
|
|
178
178
|
if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
|
|
179
|
-
dtype[
|
|
179
|
+
dtype[field_idx] = object
|
|
180
180
|
|
|
181
181
|
df = pd.DataFrame()
|
|
182
182
|
for index, t in enumerate(dtype):
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -223,6 +223,11 @@ class StoreManager:
|
|
|
223
223
|
subpath = url[len("memory://") :]
|
|
224
224
|
return in_memory_store, subpath, url
|
|
225
225
|
|
|
226
|
+
elif schema in get_local_file_schema():
|
|
227
|
+
# parse_url() will drop the windows drive-letter from the path for url like "c:\a\b".
|
|
228
|
+
# As a workaround, we set subpath to the url.
|
|
229
|
+
subpath = url.replace("file://", "", 1)
|
|
230
|
+
|
|
226
231
|
if not schema and endpoint:
|
|
227
232
|
if endpoint in self._stores.keys():
|
|
228
233
|
return self._stores[endpoint], subpath, url
|
|
@@ -241,8 +246,7 @@ class StoreManager:
|
|
|
241
246
|
)
|
|
242
247
|
if not secrets and not mlrun.config.is_running_as_api():
|
|
243
248
|
self._stores[store_key] = store
|
|
244
|
-
|
|
245
|
-
return store, url if store.kind == "file" else subpath, url
|
|
249
|
+
return store, subpath, url
|
|
246
250
|
|
|
247
251
|
def reset_secrets(self):
|
|
248
252
|
self._secrets = {}
|
mlrun/datastore/redis.py
CHANGED
|
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
34
|
-
|
|
34
|
+
redis_default_port = "6379"
|
|
35
35
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
36
36
|
self.headers = None
|
|
37
37
|
|
|
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
|
|
|
49
49
|
user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
|
|
50
50
|
password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
|
|
51
51
|
host = parsed_endpoint.hostname
|
|
52
|
-
port = parsed_endpoint.port if parsed_endpoint.port else
|
|
52
|
+
port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
|
|
53
53
|
schema = parsed_endpoint.scheme
|
|
54
54
|
if user or password:
|
|
55
55
|
endpoint = f"{schema}://{user}:{password}@{host}:{port}"
|
mlrun/datastore/s3.py
CHANGED
|
@@ -198,6 +198,11 @@ class S3Store(DataStore):
|
|
|
198
198
|
bucket = self.s3.Bucket(bucket)
|
|
199
199
|
return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)]
|
|
200
200
|
|
|
201
|
+
def rm(self, path, recursive=False, maxdepth=None):
|
|
202
|
+
bucket, key = self.get_bucket_and_key(path)
|
|
203
|
+
path = f"{bucket}/{key}"
|
|
204
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
205
|
+
|
|
201
206
|
|
|
202
207
|
def parse_s3_bucket_and_key(s3_path):
|
|
203
208
|
try:
|