mlrun 1.7.0rc36__py3-none-any.whl → 1.7.0rc38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +64 -0
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/model_monitoring/constants.py +4 -0
- mlrun/common/schemas/notification.py +26 -7
- mlrun/datastore/azure_blob.py +120 -30
- mlrun/datastore/s3.py +8 -1
- mlrun/feature_store/common.py +6 -11
- mlrun/model.py +5 -0
- mlrun/model_monitoring/api.py +1 -1
- mlrun/model_monitoring/applications/_application_steps.py +9 -4
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +14 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +10 -7
- mlrun/model_monitoring/db/tsdb/base.py +141 -12
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +65 -5
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +23 -1
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +211 -35
- mlrun/model_monitoring/helpers.py +1 -2
- mlrun/model_monitoring/stream_processing.py +67 -25
- mlrun/model_monitoring/writer.py +4 -1
- mlrun/projects/operations.py +4 -0
- mlrun/projects/project.py +11 -1
- mlrun/runtimes/__init__.py +15 -8
- mlrun/runtimes/base.py +3 -0
- mlrun/runtimes/nuclio/application/application.py +98 -17
- mlrun/runtimes/nuclio/function.py +5 -1
- mlrun/runtimes/pod.py +2 -2
- mlrun/runtimes/remotesparkjob.py +2 -5
- mlrun/runtimes/sparkjob/spark3job.py +11 -16
- mlrun/serving/routers.py +1 -4
- mlrun/serving/server.py +4 -7
- mlrun/serving/states.py +1 -1
- mlrun/serving/v2_serving.py +5 -7
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/METADATA +12 -6
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/RECORD +40 -40
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/top_level.txt +0 -0
mlrun/alerts/alert.py
CHANGED
|
@@ -28,6 +28,8 @@ class AlertConfig(ModelObj):
|
|
|
28
28
|
"severity",
|
|
29
29
|
"reset_policy",
|
|
30
30
|
"state",
|
|
31
|
+
"count",
|
|
32
|
+
"created",
|
|
31
33
|
]
|
|
32
34
|
_fields_to_serialize = ModelObj._fields_to_serialize + [
|
|
33
35
|
"entities",
|
|
@@ -54,6 +56,68 @@ class AlertConfig(ModelObj):
|
|
|
54
56
|
created: str = None,
|
|
55
57
|
count: int = None,
|
|
56
58
|
):
|
|
59
|
+
"""Alert config object
|
|
60
|
+
|
|
61
|
+
Example::
|
|
62
|
+
|
|
63
|
+
# create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
|
|
64
|
+
# 3 times in the next hour.
|
|
65
|
+
from mlrun.alerts import AlertConfig
|
|
66
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
67
|
+
|
|
68
|
+
entity_kind = alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT
|
|
69
|
+
entity_id = get_default_result_instance_fqn(endpoint_id)
|
|
70
|
+
event_name = alert_objects.EventKind.DATA_DRIFT_DETECTED
|
|
71
|
+
notification = mlrun.model.Notification(
|
|
72
|
+
kind="slack",
|
|
73
|
+
name="slack_notification",
|
|
74
|
+
message="drift was detected",
|
|
75
|
+
severity="warning",
|
|
76
|
+
when=["now"],
|
|
77
|
+
condition="failed",
|
|
78
|
+
secret_params={
|
|
79
|
+
"webhook": "https://hooks.slack.com/",
|
|
80
|
+
},
|
|
81
|
+
).to_dict()
|
|
82
|
+
|
|
83
|
+
alert_data = AlertConfig(
|
|
84
|
+
project="my-project",
|
|
85
|
+
name="drift-alert",
|
|
86
|
+
summary="a drift was detected",
|
|
87
|
+
severity=alert_objects.AlertSeverity.LOW,
|
|
88
|
+
entities=alert_objects.EventEntities(
|
|
89
|
+
kind=entity_kind, project="my-project", ids=[entity_id]
|
|
90
|
+
),
|
|
91
|
+
trigger=alert_objects.AlertTrigger(events=[event_name]),
|
|
92
|
+
criteria=alert_objects.AlertCriteria(count=3, period="1h"),
|
|
93
|
+
notifications=[alert_objects.AlertNotification(notification=notification)],
|
|
94
|
+
)
|
|
95
|
+
project.store_alert_config(alert_data)
|
|
96
|
+
|
|
97
|
+
:param project: name of the project to associate the alert with
|
|
98
|
+
:param name: name of the alert
|
|
99
|
+
:param template: optional parameter that allows to create an alert based on a predefined template.
|
|
100
|
+
you can pass either an AlertTemplate object or a string (the template name).
|
|
101
|
+
if a template is used, many fields of the alert will be auto-generated based on the
|
|
102
|
+
template. however, you still need to provide the following fields:
|
|
103
|
+
`name`, `project`, `entity`, `notifications`
|
|
104
|
+
:param description: description of the alert
|
|
105
|
+
:param summary: summary of the alert, will be sent in the generated notifications
|
|
106
|
+
:param severity: severity of the alert
|
|
107
|
+
:param trigger: the events that will trigger this alert, may be a simple trigger based on events or
|
|
108
|
+
complex trigger which is based on a prometheus alert
|
|
109
|
+
:param criteria: when the alert will be triggered based on the specified number of events within the
|
|
110
|
+
defined time period.
|
|
111
|
+
:param reset_policy: when to clear the alert. May be "manual" for manual reset of the alert, or
|
|
112
|
+
"auto" if the criteria contains a time period
|
|
113
|
+
:param notifications: list of notifications to invoke once the alert is triggered
|
|
114
|
+
:param entities: entities that the event relates to. The entity object will contain fields that uniquely
|
|
115
|
+
identify a given entity in the system
|
|
116
|
+
:param id: internal id of the alert (user should not supply it)
|
|
117
|
+
:param state: state of the alert, may be active/inactive (user should not supply it)
|
|
118
|
+
:param created: when the alert is created (user should not supply it)
|
|
119
|
+
:param count: internal counter of the alert (user should not supply it)
|
|
120
|
+
"""
|
|
57
121
|
self.project = project
|
|
58
122
|
self.name = name
|
|
59
123
|
self.description = description
|
mlrun/common/schemas/alert.py
CHANGED
|
@@ -149,7 +149,7 @@ class AlertConfig(pydantic.BaseModel):
|
|
|
149
149
|
entities: EventEntities
|
|
150
150
|
trigger: AlertTrigger
|
|
151
151
|
criteria: Optional[AlertCriteria]
|
|
152
|
-
reset_policy: ResetPolicy = ResetPolicy.
|
|
152
|
+
reset_policy: ResetPolicy = ResetPolicy.AUTO
|
|
153
153
|
notifications: pydantic.conlist(AlertNotification, min_items=1)
|
|
154
154
|
state: AlertActiveState = AlertActiveState.INACTIVE
|
|
155
155
|
count: Optional[int] = 0
|
|
@@ -185,7 +185,7 @@ class AlertTemplate(
|
|
|
185
185
|
severity: AlertSeverity
|
|
186
186
|
trigger: AlertTrigger
|
|
187
187
|
criteria: Optional[AlertCriteria]
|
|
188
|
-
reset_policy: ResetPolicy = ResetPolicy.
|
|
188
|
+
reset_policy: ResetPolicy = ResetPolicy.AUTO
|
|
189
189
|
|
|
190
190
|
# This is slightly different than __eq__ as it doesn't compare everything
|
|
191
191
|
def templates_differ(self, other):
|
|
@@ -53,9 +53,11 @@ class EventFieldType:
|
|
|
53
53
|
PREDICTIONS = "predictions"
|
|
54
54
|
NAMED_PREDICTIONS = "named_predictions"
|
|
55
55
|
ERROR_COUNT = "error_count"
|
|
56
|
+
MODEL_ERROR = "model_error"
|
|
56
57
|
ENTITIES = "entities"
|
|
57
58
|
FIRST_REQUEST = "first_request"
|
|
58
59
|
LAST_REQUEST = "last_request"
|
|
60
|
+
LAST_REQUEST_TIMESTAMP = "last_request_timestamp"
|
|
59
61
|
METRIC = "metric"
|
|
60
62
|
METRICS = "metrics"
|
|
61
63
|
BATCH_INTERVALS_DICT = "batch_intervals_dict"
|
|
@@ -217,6 +219,7 @@ class FileTargetKind:
|
|
|
217
219
|
APP_METRICS = "app_metrics"
|
|
218
220
|
MONITORING_SCHEDULES = "monitoring_schedules"
|
|
219
221
|
MONITORING_APPLICATION = "monitoring_application"
|
|
222
|
+
ERRORS = "errors"
|
|
220
223
|
|
|
221
224
|
|
|
222
225
|
class ModelMonitoringMode(str, Enum):
|
|
@@ -240,6 +243,7 @@ class V3IOTSDBTables(MonitoringStrEnum):
|
|
|
240
243
|
APP_RESULTS = "app-results"
|
|
241
244
|
METRICS = "metrics"
|
|
242
245
|
EVENTS = "events"
|
|
246
|
+
ERRORS = "errors"
|
|
243
247
|
|
|
244
248
|
|
|
245
249
|
class TDEngineSuperTables(MonitoringStrEnum):
|
|
@@ -50,15 +50,34 @@ class NotificationLimits(enum.Enum):
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class Notification(pydantic.BaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Notification object schema
|
|
55
|
+
:param kind: notification implementation kind - slack, webhook, etc.
|
|
56
|
+
:param name: for logging and identification
|
|
57
|
+
:param message: message content in the notification
|
|
58
|
+
:param severity: severity to display in the notification
|
|
59
|
+
:param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
|
|
60
|
+
:param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
|
|
61
|
+
to evaluate if the notification should be sent in addition to the 'when' statuses.
|
|
62
|
+
e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
|
|
63
|
+
:param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
|
|
64
|
+
git repository details, etc.)
|
|
65
|
+
:param secret_params: secret parameters for the notification implementation, same as params but will be stored
|
|
66
|
+
in a k8s secret and passed as a secret reference to the implementation.
|
|
67
|
+
:param status: notification status - pending, sent, error
|
|
68
|
+
:param sent_time: time the notification was sent
|
|
69
|
+
:param reason: failure reason if the notification failed to send
|
|
70
|
+
"""
|
|
71
|
+
|
|
53
72
|
kind: NotificationKind
|
|
54
73
|
name: str
|
|
55
|
-
message: str
|
|
56
|
-
severity: NotificationSeverity
|
|
57
|
-
when: list[str]
|
|
58
|
-
condition: str = None
|
|
59
|
-
params: dict[str, typing.Any] = None
|
|
60
|
-
status: NotificationStatus = None
|
|
61
|
-
sent_time: typing.Union[str, datetime.datetime] = None
|
|
74
|
+
message: typing.Optional[str] = None
|
|
75
|
+
severity: typing.Optional[NotificationSeverity] = None
|
|
76
|
+
when: typing.Optional[list[str]] = None
|
|
77
|
+
condition: typing.Optional[str] = None
|
|
78
|
+
params: typing.Optional[dict[str, typing.Any]] = None
|
|
79
|
+
status: typing.Optional[NotificationStatus] = None
|
|
80
|
+
sent_time: typing.Optional[typing.Union[str, datetime.datetime]] = None
|
|
62
81
|
secret_params: typing.Optional[dict[str, typing.Any]] = None
|
|
63
82
|
reason: typing.Optional[str] = None
|
|
64
83
|
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -16,6 +16,7 @@ import time
|
|
|
16
16
|
from pathlib import Path
|
|
17
17
|
from urllib.parse import urlparse
|
|
18
18
|
|
|
19
|
+
from azure.storage.blob import BlobServiceClient
|
|
19
20
|
from azure.storage.blob._shared.base_client import parse_connection_str
|
|
20
21
|
from fsspec.registry import get_filesystem_class
|
|
21
22
|
|
|
@@ -29,47 +30,128 @@ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
|
|
|
29
30
|
|
|
30
31
|
class AzureBlobStore(DataStore):
|
|
31
32
|
using_bucket = True
|
|
33
|
+
max_concurrency = 100
|
|
34
|
+
max_blocksize = 1024 * 1024 * 4
|
|
35
|
+
max_single_put_size = (
|
|
36
|
+
1024 * 1024 * 8
|
|
37
|
+
) # for service_client property only, does not affect filesystem
|
|
32
38
|
|
|
33
39
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
34
40
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
41
|
+
self._service_client = None
|
|
42
|
+
self._storage_options = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def storage_options(self):
|
|
46
|
+
if not self._storage_options:
|
|
47
|
+
res = dict(
|
|
48
|
+
account_name=self._get_secret_or_env("account_name")
|
|
49
|
+
or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
|
|
50
|
+
account_key=self._get_secret_or_env("account_key")
|
|
51
|
+
or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_KEY"),
|
|
52
|
+
connection_string=self._get_secret_or_env("connection_string")
|
|
53
|
+
or self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING"),
|
|
54
|
+
tenant_id=self._get_secret_or_env("tenant_id")
|
|
55
|
+
or self._get_secret_or_env("AZURE_STORAGE_TENANT_ID"),
|
|
56
|
+
client_id=self._get_secret_or_env("client_id")
|
|
57
|
+
or self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID"),
|
|
58
|
+
client_secret=self._get_secret_or_env("client_secret")
|
|
59
|
+
or self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET"),
|
|
60
|
+
sas_token=self._get_secret_or_env("sas_token")
|
|
61
|
+
or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
|
|
62
|
+
credential=self._get_secret_or_env("credential"),
|
|
63
|
+
)
|
|
64
|
+
self._storage_options = self._sanitize_storage_options(res)
|
|
65
|
+
return self._storage_options
|
|
35
66
|
|
|
36
67
|
@property
|
|
37
68
|
def filesystem(self):
|
|
38
69
|
"""return fsspec file system object, if supported"""
|
|
39
|
-
if self._filesystem:
|
|
40
|
-
return self._filesystem
|
|
41
70
|
try:
|
|
42
71
|
import adlfs # noqa
|
|
43
72
|
except ImportError as exc:
|
|
44
73
|
raise ImportError("Azure adlfs not installed") from exc
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
filesystem_class
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
74
|
+
|
|
75
|
+
if not self._filesystem:
|
|
76
|
+
# in order to support az and wasbs kinds
|
|
77
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
78
|
+
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
79
|
+
filesystem_class,
|
|
80
|
+
using_bucket=self.using_bucket,
|
|
81
|
+
blocksize=self.max_blocksize,
|
|
82
|
+
**self.storage_options,
|
|
83
|
+
)
|
|
52
84
|
return self._filesystem
|
|
53
85
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
86
|
+
@property
|
|
87
|
+
def service_client(self):
|
|
88
|
+
try:
|
|
89
|
+
import azure # noqa
|
|
90
|
+
except ImportError as exc:
|
|
91
|
+
raise ImportError("Azure not installed") from exc
|
|
92
|
+
|
|
93
|
+
if not self._service_client:
|
|
94
|
+
self._do_connect()
|
|
95
|
+
return self._service_client
|
|
96
|
+
|
|
97
|
+
def _do_connect(self):
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
Creates a client for azure.
|
|
101
|
+
Raises MLRunInvalidArgumentError if none of the connection details are available
|
|
102
|
+
based on do_connect in AzureBlobFileSystem:
|
|
103
|
+
https://github.com/fsspec/adlfs/blob/2023.9.0/adlfs/spec.py#L422
|
|
104
|
+
"""
|
|
105
|
+
from azure.identity import ClientSecretCredential
|
|
106
|
+
|
|
107
|
+
storage_options = self.storage_options
|
|
108
|
+
connection_string = storage_options.get("connection_string")
|
|
109
|
+
client_name = storage_options.get("account_name")
|
|
110
|
+
account_key = storage_options.get("account_key")
|
|
111
|
+
sas_token = storage_options.get("sas_token")
|
|
112
|
+
client_id = storage_options.get("client_id")
|
|
113
|
+
credential = storage_options.get("credential")
|
|
114
|
+
|
|
115
|
+
credential_from_client_id = None
|
|
116
|
+
if (
|
|
117
|
+
credential is None
|
|
118
|
+
and account_key is None
|
|
119
|
+
and sas_token is None
|
|
120
|
+
and client_id is not None
|
|
121
|
+
):
|
|
122
|
+
credential_from_client_id = ClientSecretCredential(
|
|
123
|
+
tenant_id=storage_options.get("tenant_id"),
|
|
124
|
+
client_id=client_id,
|
|
125
|
+
client_secret=storage_options.get("client_secret"),
|
|
126
|
+
)
|
|
127
|
+
try:
|
|
128
|
+
if connection_string is not None:
|
|
129
|
+
self._service_client = BlobServiceClient.from_connection_string(
|
|
130
|
+
conn_str=connection_string,
|
|
131
|
+
max_block_size=self.max_blocksize,
|
|
132
|
+
max_single_put_size=self.max_single_put_size,
|
|
133
|
+
)
|
|
134
|
+
elif client_name is not None:
|
|
135
|
+
account_url = f"https://{client_name}.blob.core.windows.net"
|
|
136
|
+
cred = credential_from_client_id or credential or account_key
|
|
137
|
+
if not cred and sas_token is not None:
|
|
138
|
+
if not sas_token.startswith("?"):
|
|
139
|
+
sas_token = f"?{sas_token}"
|
|
140
|
+
account_url = account_url + sas_token
|
|
141
|
+
self._service_client = BlobServiceClient(
|
|
142
|
+
account_url=account_url,
|
|
143
|
+
credential=cred,
|
|
144
|
+
max_block_size=self.max_blocksize,
|
|
145
|
+
max_single_put_size=self.max_single_put_size,
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
149
|
+
"Must provide either a connection_string or account_name with credentials"
|
|
150
|
+
)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
153
|
+
f"unable to connect to account for {e}"
|
|
154
|
+
)
|
|
73
155
|
|
|
74
156
|
def _convert_key_to_remote_path(self, key):
|
|
75
157
|
key = key.strip("/")
|
|
@@ -82,7 +164,15 @@ class AzureBlobStore(DataStore):
|
|
|
82
164
|
|
|
83
165
|
def upload(self, key, src_path):
|
|
84
166
|
remote_path = self._convert_key_to_remote_path(key)
|
|
85
|
-
|
|
167
|
+
container, remote_path = remote_path.split("/", 1)
|
|
168
|
+
container_client = self.service_client.get_container_client(container=container)
|
|
169
|
+
with open(file=src_path, mode="rb") as data:
|
|
170
|
+
container_client.upload_blob(
|
|
171
|
+
name=remote_path,
|
|
172
|
+
data=data,
|
|
173
|
+
overwrite=True,
|
|
174
|
+
max_concurrency=self.max_concurrency,
|
|
175
|
+
)
|
|
86
176
|
|
|
87
177
|
def get(self, key, size=None, offset=0):
|
|
88
178
|
remote_path = self._convert_key_to_remote_path(key)
|
|
@@ -135,7 +225,7 @@ class AzureBlobStore(DataStore):
|
|
|
135
225
|
|
|
136
226
|
def get_spark_options(self):
|
|
137
227
|
res = {}
|
|
138
|
-
st = self.
|
|
228
|
+
st = self.storage_options()
|
|
139
229
|
service = "blob"
|
|
140
230
|
primary_url = None
|
|
141
231
|
if st.get("connection_string"):
|
mlrun/datastore/s3.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import time
|
|
16
16
|
|
|
17
17
|
import boto3
|
|
18
|
+
from boto3.s3.transfer import TransferConfig
|
|
18
19
|
from fsspec.registry import get_filesystem_class
|
|
19
20
|
|
|
20
21
|
import mlrun.errors
|
|
@@ -40,6 +41,12 @@ class S3Store(DataStore):
|
|
|
40
41
|
profile_name = self._get_secret_or_env("AWS_PROFILE")
|
|
41
42
|
assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
|
|
42
43
|
|
|
44
|
+
self.config = TransferConfig(
|
|
45
|
+
multipart_threshold=1024 * 1024 * 25,
|
|
46
|
+
max_concurrency=10,
|
|
47
|
+
multipart_chunksize=1024 * 1024 * 25,
|
|
48
|
+
)
|
|
49
|
+
|
|
43
50
|
# If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
|
|
44
51
|
if assume_role_arn:
|
|
45
52
|
client = boto3.client(
|
|
@@ -166,7 +173,7 @@ class S3Store(DataStore):
|
|
|
166
173
|
|
|
167
174
|
def upload(self, key, src_path):
|
|
168
175
|
bucket, key = self.get_bucket_and_key(key)
|
|
169
|
-
self.s3.
|
|
176
|
+
self.s3.Bucket(bucket).upload_file(src_path, key, Config=self.config)
|
|
170
177
|
|
|
171
178
|
def get(self, key, size=None, offset=0):
|
|
172
179
|
bucket, key = self.get_bucket_and_key(key)
|
mlrun/feature_store/common.py
CHANGED
|
@@ -37,17 +37,12 @@ def parse_feature_string(feature):
|
|
|
37
37
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
38
38
|
f"feature {feature} must be {expected_message}"
|
|
39
39
|
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
feature_set
|
|
46
|
-
feature_name = splitted[1]
|
|
47
|
-
splitted = feature_name.split(" as ")
|
|
48
|
-
if len(splitted) > 1:
|
|
49
|
-
return feature_set.strip(), splitted[0].strip(), splitted[1].strip()
|
|
50
|
-
return feature_set.strip(), feature_name.strip(), None
|
|
40
|
+
feature_set, feature_name = feature.rsplit(feature_separator, 1)
|
|
41
|
+
feature_set = feature_set.strip()
|
|
42
|
+
split_result = feature_name.split(" as ", 1)
|
|
43
|
+
feature_name = split_result[0].strip()
|
|
44
|
+
alias = split_result[1].strip() if len(split_result) > 1 else None
|
|
45
|
+
return feature_set, feature_name, alias
|
|
51
46
|
|
|
52
47
|
|
|
53
48
|
def parse_project_name_from_feature_string(feature):
|
mlrun/model.py
CHANGED
|
@@ -1789,6 +1789,11 @@ class RunObject(RunTemplate):
|
|
|
1789
1789
|
|
|
1790
1790
|
return state
|
|
1791
1791
|
|
|
1792
|
+
def abort(self):
|
|
1793
|
+
"""abort the run"""
|
|
1794
|
+
db = mlrun.get_run_db()
|
|
1795
|
+
db.abort_run(self.metadata.uid, self.metadata.project)
|
|
1796
|
+
|
|
1792
1797
|
@staticmethod
|
|
1793
1798
|
def create_uri(project: str, uid: str, iteration: Union[int, str], tag: str = ""):
|
|
1794
1799
|
if tag:
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -20,6 +20,7 @@ import mlrun.common.model_monitoring.helpers
|
|
|
20
20
|
import mlrun.common.schemas.model_monitoring.constants as mm_constant
|
|
21
21
|
import mlrun.datastore
|
|
22
22
|
import mlrun.serving
|
|
23
|
+
import mlrun.utils.helpers
|
|
23
24
|
import mlrun.utils.v3io_clients
|
|
24
25
|
from mlrun.model_monitoring.helpers import get_stream_path
|
|
25
26
|
from mlrun.serving.utils import StepToDict
|
|
@@ -34,8 +35,8 @@ class _PushToMonitoringWriter(StepToDict):
|
|
|
34
35
|
|
|
35
36
|
def __init__(
|
|
36
37
|
self,
|
|
37
|
-
project:
|
|
38
|
-
writer_application_name:
|
|
38
|
+
project: str,
|
|
39
|
+
writer_application_name: str,
|
|
39
40
|
stream_uri: Optional[str] = None,
|
|
40
41
|
name: Optional[str] = None,
|
|
41
42
|
):
|
|
@@ -109,6 +110,7 @@ class _PushToMonitoringWriter(StepToDict):
|
|
|
109
110
|
f"Pushing data = {writer_event} \n to stream = {self.stream_uri}"
|
|
110
111
|
)
|
|
111
112
|
self.output_stream.push([writer_event])
|
|
113
|
+
logger.info(f"Pushed data to {self.stream_uri} successfully")
|
|
112
114
|
|
|
113
115
|
def _lazy_init(self):
|
|
114
116
|
if self.output_stream is None:
|
|
@@ -150,12 +152,15 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
150
152
|
|
|
151
153
|
@staticmethod
|
|
152
154
|
def _create_mlrun_context(app_name: str):
|
|
155
|
+
artifact_path = mlrun.utils.helpers.template_artifact_path(
|
|
156
|
+
mlrun.mlconf.artifact_path, mlrun.mlconf.default_project
|
|
157
|
+
)
|
|
153
158
|
context = mlrun.get_or_create_ctx(
|
|
154
159
|
f"{app_name}-logger",
|
|
155
160
|
spec={
|
|
156
|
-
"metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}}
|
|
161
|
+
"metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}},
|
|
162
|
+
"spec": {mlrun.utils.helpers.RunKeys.output_path: artifact_path},
|
|
157
163
|
},
|
|
158
|
-
upload_artifacts=True,
|
|
159
164
|
)
|
|
160
165
|
context.__class__ = MonitoringApplicationContext
|
|
161
166
|
return context
|
|
@@ -18,6 +18,7 @@ from sqlalchemy.ext.declarative import declarative_base, declared_attr
|
|
|
18
18
|
|
|
19
19
|
from mlrun.common.schemas.model_monitoring import (
|
|
20
20
|
EventFieldType,
|
|
21
|
+
ResultData,
|
|
21
22
|
WriterEvent,
|
|
22
23
|
)
|
|
23
24
|
|
|
@@ -32,6 +33,13 @@ Base = declarative_base()
|
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
class ModelEndpointsTable(Base, ModelEndpointsBaseTable):
|
|
36
|
+
feature_stats = Column(
|
|
37
|
+
EventFieldType.FEATURE_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
38
|
+
)
|
|
39
|
+
current_stats = Column(
|
|
40
|
+
EventFieldType.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
41
|
+
)
|
|
42
|
+
metrics = Column(EventFieldType.METRICS, sqlalchemy.dialects.mysql.MEDIUMTEXT)
|
|
35
43
|
first_request = Column(
|
|
36
44
|
EventFieldType.FIRST_REQUEST,
|
|
37
45
|
# TODO: migrate to DATETIME, see ML-6921
|
|
@@ -72,7 +80,12 @@ class _ApplicationResultOrMetric:
|
|
|
72
80
|
class ApplicationResultTable(
|
|
73
81
|
Base, _ApplicationResultOrMetric, ApplicationResultBaseTable
|
|
74
82
|
):
|
|
75
|
-
|
|
83
|
+
result_extra_data = Column(
|
|
84
|
+
ResultData.RESULT_EXTRA_DATA, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
85
|
+
)
|
|
86
|
+
current_stats = Column(
|
|
87
|
+
ResultData.CURRENT_STATS, sqlalchemy.dialects.mysql.MEDIUMTEXT
|
|
88
|
+
)
|
|
76
89
|
|
|
77
90
|
|
|
78
91
|
class ApplicationMetricsTable(
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import http
|
|
15
15
|
import json
|
|
16
16
|
import typing
|
|
17
17
|
from dataclasses import dataclass
|
|
@@ -350,7 +350,7 @@ class KVStoreBase(StoreBase):
|
|
|
350
350
|
table_path = self._get_results_table_path(endpoint_id)
|
|
351
351
|
key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
|
|
352
352
|
metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
|
|
353
|
-
attributes = {metric_name: json.dumps(event)}
|
|
353
|
+
attributes = {metric_name: self._encode_field(json.dumps(event))}
|
|
354
354
|
else:
|
|
355
355
|
raise ValueError(f"Invalid {kind = }")
|
|
356
356
|
|
|
@@ -417,11 +417,14 @@ class KVStoreBase(StoreBase):
|
|
|
417
417
|
)
|
|
418
418
|
return response.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
|
|
419
419
|
except v3io.dataplane.response.HttpResponseError as err:
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
420
|
+
if err.status_code == http.HTTPStatus.NOT_FOUND:
|
|
421
|
+
logger.debug("Last analyzed time not found", err=err)
|
|
422
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
423
|
+
f"No last analyzed value has been found for {application_name} "
|
|
424
|
+
f"that processes model endpoint {endpoint_id}",
|
|
425
|
+
)
|
|
426
|
+
logger.error("Error while getting last analyzed time", err=err)
|
|
427
|
+
raise err
|
|
425
428
|
|
|
426
429
|
def update_last_analyzed(
|
|
427
430
|
self, endpoint_id: str, application_name: str, last_analyzed: int
|