mlrun 1.8.0rc17__py3-none-any.whl → 1.8.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/schemas/artifact.py +6 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +11 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -2
- mlrun/config.py +3 -2
- mlrun/db/base.py +9 -0
- mlrun/db/httpdb.py +42 -0
- mlrun/model_monitoring/applications/base.py +54 -19
- mlrun/model_monitoring/db/tsdb/base.py +116 -8
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +23 -11
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +23 -4
- mlrun/model_monitoring/helpers.py +2 -2
- mlrun/projects/pipelines.py +2 -1
- mlrun/projects/project.py +12 -7
- mlrun/serving/states.py +3 -3
- mlrun/serving/v2_serving.py +3 -3
- mlrun/utils/helpers.py +134 -0
- mlrun/utils/notifications/notification/webhook.py +3 -0
- mlrun/utils/notifications/notification_pusher.py +33 -131
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc17.dist-info → mlrun-1.8.0rc19.dist-info}/METADATA +5 -7
- {mlrun-1.8.0rc17.dist-info → mlrun-1.8.0rc19.dist-info}/RECORD +26 -26
- {mlrun-1.8.0rc17.dist-info → mlrun-1.8.0rc19.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc17.dist-info → mlrun-1.8.0rc19.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc17.dist-info → mlrun-1.8.0rc19.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc17.dist-info → mlrun-1.8.0rc19.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/artifact.py
CHANGED
|
@@ -51,6 +51,12 @@ class ArtifactCategories(mlrun.common.types.StrEnum):
|
|
|
51
51
|
True,
|
|
52
52
|
)
|
|
53
53
|
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_kind(cls, kind: str) -> "ArtifactCategories":
|
|
56
|
+
if kind in [cls.model.value, cls.dataset.value, cls.document.value]:
|
|
57
|
+
return cls(kind)
|
|
58
|
+
return cls.other
|
|
59
|
+
|
|
54
60
|
|
|
55
61
|
class ArtifactIdentifier(pydantic.v1.BaseModel):
|
|
56
62
|
# artifact kind
|
|
@@ -248,6 +248,12 @@ class ProjectSecretKeys:
|
|
|
248
248
|
]
|
|
249
249
|
|
|
250
250
|
|
|
251
|
+
class GetEventsFormat(MonitoringStrEnum):
|
|
252
|
+
SINGLE = "single"
|
|
253
|
+
SEPARATION = "separation"
|
|
254
|
+
INTERSECTION = "intersection"
|
|
255
|
+
|
|
256
|
+
|
|
251
257
|
class ModelEndpointTargetSchemas(MonitoringStrEnum):
|
|
252
258
|
V3IO = "v3io"
|
|
253
259
|
MYSQL = "mysql"
|
|
@@ -448,3 +454,8 @@ FQN_REGEX = re.compile(FQN_PATTERN)
|
|
|
448
454
|
PROJECT_PATTERN = r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"
|
|
449
455
|
|
|
450
456
|
MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
|
|
457
|
+
|
|
458
|
+
INTERSECT_DICT_KEYS = {
|
|
459
|
+
ModelEndpointMonitoringMetricType.METRIC: "intersect_metrics",
|
|
460
|
+
ModelEndpointMonitoringMetricType.RESULT: "intersect_results",
|
|
461
|
+
}
|
|
@@ -259,12 +259,12 @@ class ModelEndpointMonitoringMetric(BaseModel):
|
|
|
259
259
|
|
|
260
260
|
def __init__(self, **kwargs):
|
|
261
261
|
super().__init__(**kwargs)
|
|
262
|
-
self.full_name =
|
|
262
|
+
self.full_name = compose_full_name(
|
|
263
263
|
project=self.project, app=self.app, name=self.name, type=self.type
|
|
264
264
|
)
|
|
265
265
|
|
|
266
266
|
|
|
267
|
-
def
|
|
267
|
+
def compose_full_name(
|
|
268
268
|
*,
|
|
269
269
|
project: str,
|
|
270
270
|
app: str,
|
mlrun/config.py
CHANGED
|
@@ -83,8 +83,8 @@ default_config = {
|
|
|
83
83
|
"images_to_enrich_registry": "^mlrun/*,python:3.9",
|
|
84
84
|
"kfp_url": "",
|
|
85
85
|
"kfp_ttl": "14400", # KFP ttl in sec, after that completed PODs will be deleted
|
|
86
|
-
"kfp_image": "mlrun/mlrun-kfp", # image to use for KFP runner
|
|
87
|
-
"dask_kfp_image": "mlrun/ml-base", # image to use for dask KFP runner
|
|
86
|
+
"kfp_image": "mlrun/mlrun-kfp", # image to use for KFP runner
|
|
87
|
+
"dask_kfp_image": "mlrun/ml-base", # image to use for dask KFP runner
|
|
88
88
|
"igz_version": "", # the version of the iguazio system the API is running on
|
|
89
89
|
"iguazio_api_url": "", # the url to iguazio api
|
|
90
90
|
"spark_app_image": "", # image to use for spark operator app runtime
|
|
@@ -169,6 +169,7 @@ default_config = {
|
|
|
169
169
|
"max_chunk_size": 1024 * 1024 * 1, # 1MB
|
|
170
170
|
"max_preview_size": 1024 * 1024 * 10, # 10MB
|
|
171
171
|
"max_download_size": 1024 * 1024 * 100, # 100MB
|
|
172
|
+
"max_deletions": 200,
|
|
172
173
|
},
|
|
173
174
|
},
|
|
174
175
|
# FIXME: Adding these defaults here so we won't need to patch the "installing component" (provazio-controller) to
|
mlrun/db/base.py
CHANGED
|
@@ -337,6 +337,15 @@ class RunDBInterface(ABC):
|
|
|
337
337
|
) -> list[mm_endpoints.ModelEndpointMonitoringMetric]:
|
|
338
338
|
pass
|
|
339
339
|
|
|
340
|
+
def get_metrics_by_multiple_endpoints(
|
|
341
|
+
self,
|
|
342
|
+
project: str,
|
|
343
|
+
endpoint_ids: Union[str, list[str]],
|
|
344
|
+
type: Literal["results", "metrics", "all"] = "all",
|
|
345
|
+
events_format: mm_constants.GetEventsFormat = mm_constants.GetEventsFormat.SEPARATION,
|
|
346
|
+
) -> dict[str, list[mm_endpoints.ModelEndpointMonitoringMetric]]:
|
|
347
|
+
pass
|
|
348
|
+
|
|
340
349
|
@abstractmethod
|
|
341
350
|
def delete_project(
|
|
342
351
|
self,
|
mlrun/db/httpdb.py
CHANGED
|
@@ -3524,6 +3524,48 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3524
3524
|
list[mm_endpoints.ModelEndpointMonitoringMetric], monitoring_metrics
|
|
3525
3525
|
)
|
|
3526
3526
|
|
|
3527
|
+
def get_metrics_by_multiple_endpoints(
|
|
3528
|
+
self,
|
|
3529
|
+
project: str,
|
|
3530
|
+
endpoint_ids: Union[str, list[str]],
|
|
3531
|
+
type: Literal["results", "metrics", "all"] = "all",
|
|
3532
|
+
events_format: mm_constants.GetEventsFormat = mm_constants.GetEventsFormat.SEPARATION,
|
|
3533
|
+
) -> dict[str, list[mm_endpoints.ModelEndpointMonitoringMetric]]:
|
|
3534
|
+
"""Get application metrics/results by endpoint id and project.
|
|
3535
|
+
|
|
3536
|
+
:param project: The name of the project.
|
|
3537
|
+
:param endpoint_ids: The unique id of the model endpoint. Can be a single id or a list of ids.
|
|
3538
|
+
:param type: The type of the metrics to return. "all" means "results" and "metrics".
|
|
3539
|
+
:param events_format: response format:
|
|
3540
|
+
|
|
3541
|
+
separation: {"mep_id1":[...], "mep_id2":[...]}
|
|
3542
|
+
intersection {"intersect_metrics":[], "intersect_results":[]}
|
|
3543
|
+
:return: A dictionary of application metrics and/or results for the model endpoints formatted by events_format.
|
|
3544
|
+
"""
|
|
3545
|
+
path = f"projects/{project}/model-endpoints/metrics"
|
|
3546
|
+
params = {
|
|
3547
|
+
"type": type,
|
|
3548
|
+
"endpoint-id": endpoint_ids,
|
|
3549
|
+
"events_format": events_format,
|
|
3550
|
+
}
|
|
3551
|
+
error_message = (
|
|
3552
|
+
f"Failed to get model monitoring metrics,"
|
|
3553
|
+
f" endpoint_ids: {endpoint_ids}, project: {project}"
|
|
3554
|
+
)
|
|
3555
|
+
response = self.api_call(
|
|
3556
|
+
mlrun.common.types.HTTPMethod.GET,
|
|
3557
|
+
path,
|
|
3558
|
+
error_message,
|
|
3559
|
+
params=params,
|
|
3560
|
+
)
|
|
3561
|
+
monitoring_metrics_by_endpoint = response.json()
|
|
3562
|
+
parsed_metrics_by_endpoint = {}
|
|
3563
|
+
for endpoint, metrics in monitoring_metrics_by_endpoint.items():
|
|
3564
|
+
parsed_metrics_by_endpoint[endpoint] = parse_obj_as(
|
|
3565
|
+
list[mm_endpoints.ModelEndpointMonitoringMetric], metrics
|
|
3566
|
+
)
|
|
3567
|
+
return parsed_metrics_by_endpoint
|
|
3568
|
+
|
|
3527
3569
|
def create_user_secrets(
|
|
3528
3570
|
self,
|
|
3529
3571
|
user: str,
|
|
@@ -14,7 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
import socket
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
|
-
from
|
|
17
|
+
from collections.abc import Iterator
|
|
18
|
+
from datetime import datetime, timedelta
|
|
18
19
|
from typing import Any, Optional, Union, cast
|
|
19
20
|
|
|
20
21
|
import pandas as pd
|
|
@@ -96,6 +97,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
96
97
|
endpoints: Optional[list[tuple[str, str]]] = None,
|
|
97
98
|
start: Optional[datetime] = None,
|
|
98
99
|
end: Optional[datetime] = None,
|
|
100
|
+
base_period: Optional[int] = None,
|
|
99
101
|
):
|
|
100
102
|
"""
|
|
101
103
|
A custom handler that wraps the application's logic implemented in
|
|
@@ -122,32 +124,59 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
122
124
|
return self.do_tracking(monitoring_context)
|
|
123
125
|
|
|
124
126
|
if endpoints is not None:
|
|
125
|
-
start, end = self._validate_times(start, end)
|
|
126
|
-
for
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
127
|
+
start, end = self._validate_times(start, end, base_period)
|
|
128
|
+
for window_start, window_end in self._window_generator(
|
|
129
|
+
start, end, base_period
|
|
130
|
+
):
|
|
131
|
+
for endpoint_name, endpoint_id in endpoints:
|
|
132
|
+
result = call_do_tracking(
|
|
133
|
+
event={
|
|
134
|
+
mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
|
|
135
|
+
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
136
|
+
mm_constants.ApplicationEvent.START_INFER_TIME: window_start,
|
|
137
|
+
mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
context.log_result(
|
|
141
|
+
f"{endpoint_name}_{window_start.isoformat()}_{window_end.isoformat()}",
|
|
142
|
+
result,
|
|
143
|
+
)
|
|
138
144
|
else:
|
|
139
145
|
return call_do_tracking()
|
|
140
146
|
|
|
141
147
|
@staticmethod
|
|
142
148
|
def _validate_times(
|
|
143
|
-
start: Optional[datetime],
|
|
149
|
+
start: Optional[datetime],
|
|
150
|
+
end: Optional[datetime],
|
|
151
|
+
base_period: Optional[int],
|
|
144
152
|
) -> tuple[datetime, datetime]:
|
|
145
153
|
if (start is None) or (end is None):
|
|
146
154
|
raise mlrun.errors.MLRunValueError(
|
|
147
155
|
"When `endpoint_names` is provided, you must also pass the start and end times"
|
|
148
156
|
)
|
|
157
|
+
if (base_period is not None) and not (
|
|
158
|
+
isinstance(base_period, int) and base_period > 0
|
|
159
|
+
):
|
|
160
|
+
raise mlrun.errors.MLRunValueError(
|
|
161
|
+
"`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
|
|
162
|
+
)
|
|
149
163
|
return start, end
|
|
150
164
|
|
|
165
|
+
@staticmethod
|
|
166
|
+
def _window_generator(
|
|
167
|
+
start: datetime, end: datetime, base_period: Optional[int]
|
|
168
|
+
) -> Iterator[tuple[datetime, datetime]]:
|
|
169
|
+
if base_period is None:
|
|
170
|
+
yield start, end
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
window_length = timedelta(minutes=base_period)
|
|
174
|
+
current_start_time = start
|
|
175
|
+
while current_start_time < end:
|
|
176
|
+
current_end_time = min(current_start_time + window_length, end)
|
|
177
|
+
yield current_start_time, current_end_time
|
|
178
|
+
current_start_time = current_end_time
|
|
179
|
+
|
|
151
180
|
@classmethod
|
|
152
181
|
def deploy(
|
|
153
182
|
cls,
|
|
@@ -203,6 +232,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
203
232
|
endpoints: Optional[list[tuple[str, str]]] = None,
|
|
204
233
|
start: Optional[datetime] = None,
|
|
205
234
|
end: Optional[datetime] = None,
|
|
235
|
+
base_period: Optional[int] = None,
|
|
206
236
|
) -> "mlrun.RunObject":
|
|
207
237
|
"""
|
|
208
238
|
Call this function to run the application's
|
|
@@ -228,6 +258,10 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
228
258
|
If provided, you have to provide also the start and end times of the data to analyze.
|
|
229
259
|
:param start: The start time of the sample data.
|
|
230
260
|
:param end: The end time of the sample data.
|
|
261
|
+
:param base_period: The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
|
|
262
|
+
is taken. If an integer is specified, the application is run from ``start`` to ``end``
|
|
263
|
+
in ``base_period`` length windows, except for the last window that ends at ``end`` and
|
|
264
|
+
therefore may be shorter.
|
|
231
265
|
|
|
232
266
|
:returns: The output of the
|
|
233
267
|
:py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
|
|
@@ -253,15 +287,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
253
287
|
),
|
|
254
288
|
)
|
|
255
289
|
|
|
256
|
-
params: dict[str, Union[list[tuple[str, str]], datetime]] = {}
|
|
290
|
+
params: dict[str, Union[list[tuple[str, str]], datetime, int, None]] = {}
|
|
257
291
|
if endpoints:
|
|
258
|
-
start, end = cls._validate_times(start, end)
|
|
292
|
+
start, end = cls._validate_times(start, end, base_period)
|
|
259
293
|
params["endpoints"] = endpoints
|
|
260
294
|
params["start"] = start
|
|
261
295
|
params["end"] = end
|
|
262
|
-
|
|
296
|
+
params["base_period"] = base_period
|
|
297
|
+
elif start or end or base_period:
|
|
263
298
|
raise mlrun.errors.MLRunValueError(
|
|
264
|
-
"Custom start
|
|
299
|
+
"Custom start and end times or base_period are supported only with endpoints data"
|
|
265
300
|
)
|
|
266
301
|
|
|
267
302
|
inputs: dict[str, str] = {}
|
|
@@ -234,14 +234,14 @@ class TSDBConnector(ABC):
|
|
|
234
234
|
@abstractmethod
|
|
235
235
|
def get_metrics_metadata(
|
|
236
236
|
self,
|
|
237
|
-
endpoint_id: str,
|
|
237
|
+
endpoint_id: typing.Union[str, list[str]],
|
|
238
238
|
start: typing.Optional[datetime] = None,
|
|
239
239
|
end: typing.Optional[datetime] = None,
|
|
240
240
|
) -> pd.DataFrame:
|
|
241
241
|
"""
|
|
242
|
-
Fetches distinct metrics metadata from the metrics TSDB table for a specified model
|
|
242
|
+
Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoints.
|
|
243
243
|
|
|
244
|
-
:param endpoint_id: The model endpoint identifier.
|
|
244
|
+
:param endpoint_id: The model endpoint identifier. Can be a single id or a list of ids.
|
|
245
245
|
:param start: The start time of the query.
|
|
246
246
|
:param end: The end time of the query.
|
|
247
247
|
|
|
@@ -252,14 +252,14 @@ class TSDBConnector(ABC):
|
|
|
252
252
|
@abstractmethod
|
|
253
253
|
def get_results_metadata(
|
|
254
254
|
self,
|
|
255
|
-
endpoint_id: str,
|
|
255
|
+
endpoint_id: typing.Union[str, list[str]],
|
|
256
256
|
start: typing.Optional[datetime] = None,
|
|
257
257
|
end: typing.Optional[datetime] = None,
|
|
258
258
|
) -> pd.DataFrame:
|
|
259
259
|
"""
|
|
260
|
-
Fetches distinct results metadata from the app-results TSDB table for a specified model
|
|
260
|
+
Fetches distinct results metadata from the app-results TSDB table for a specified model endpoints.
|
|
261
261
|
|
|
262
|
-
:param endpoint_id: The model endpoint identifier.
|
|
262
|
+
:param endpoint_id: The model endpoint identifier. Can be a single id or a list of ids.
|
|
263
263
|
:param start: The start time of the query.
|
|
264
264
|
:param end: The end time of the query.
|
|
265
265
|
|
|
@@ -341,7 +341,7 @@ class TSDBConnector(ABC):
|
|
|
341
341
|
logger.debug("No metrics", missing_metrics=metrics_without_data.keys())
|
|
342
342
|
grouped = []
|
|
343
343
|
for (app_name, name), sub_df in grouped:
|
|
344
|
-
full_name =
|
|
344
|
+
full_name = mm_schemas.model_endpoints.compose_full_name(
|
|
345
345
|
project=project,
|
|
346
346
|
app=app_name,
|
|
347
347
|
name=name,
|
|
@@ -410,7 +410,7 @@ class TSDBConnector(ABC):
|
|
|
410
410
|
result_kind = mlrun.model_monitoring.db.tsdb.helpers._get_result_kind(
|
|
411
411
|
sub_df
|
|
412
412
|
)
|
|
413
|
-
full_name =
|
|
413
|
+
full_name = mm_schemas.model_endpoints.compose_full_name(
|
|
414
414
|
project=project, app=app_name, name=name
|
|
415
415
|
)
|
|
416
416
|
try:
|
|
@@ -467,6 +467,7 @@ class TSDBConnector(ABC):
|
|
|
467
467
|
|
|
468
468
|
:return: A list of mm metrics objects.
|
|
469
469
|
"""
|
|
470
|
+
|
|
470
471
|
return list(
|
|
471
472
|
map(
|
|
472
473
|
lambda record: mm_schemas.ModelEndpointMonitoringMetric(
|
|
@@ -481,6 +482,113 @@ class TSDBConnector(ABC):
|
|
|
481
482
|
)
|
|
482
483
|
)
|
|
483
484
|
|
|
485
|
+
@staticmethod
|
|
486
|
+
def df_to_metrics_grouped_dict(
|
|
487
|
+
*,
|
|
488
|
+
df: pd.DataFrame,
|
|
489
|
+
project: str,
|
|
490
|
+
type: str,
|
|
491
|
+
) -> dict[str, list[mm_schemas.ModelEndpointMonitoringMetric]]:
|
|
492
|
+
"""
|
|
493
|
+
Parse a DataFrame of metrics from the TSDB into a grouped mm metrics objects by endpoint_id.
|
|
494
|
+
|
|
495
|
+
:param df: The DataFrame to parse.
|
|
496
|
+
:param project: The project name.
|
|
497
|
+
:param type: The type of the metrics (either "result" or "metric").
|
|
498
|
+
|
|
499
|
+
:return: A grouped dict of mm metrics/results, using model_endpoints_ids as keys.
|
|
500
|
+
"""
|
|
501
|
+
|
|
502
|
+
if df.empty:
|
|
503
|
+
return {}
|
|
504
|
+
|
|
505
|
+
grouped_by_fields = [mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
506
|
+
if type == "result":
|
|
507
|
+
name_column = mm_schemas.ResultData.RESULT_NAME
|
|
508
|
+
grouped_by_fields.append(mm_schemas.ResultData.RESULT_KIND)
|
|
509
|
+
else:
|
|
510
|
+
name_column = mm_schemas.MetricData.METRIC_NAME
|
|
511
|
+
|
|
512
|
+
grouped_by_fields.append(name_column)
|
|
513
|
+
# groupby has different behavior for category columns
|
|
514
|
+
df["endpoint_id"] = df["endpoint_id"].astype(str)
|
|
515
|
+
grouped_by_df = df.groupby("endpoint_id")
|
|
516
|
+
grouped_dict = grouped_by_df.apply(
|
|
517
|
+
lambda group: list(
|
|
518
|
+
map(
|
|
519
|
+
lambda record: mm_schemas.ModelEndpointMonitoringMetric(
|
|
520
|
+
project=project,
|
|
521
|
+
type=type,
|
|
522
|
+
app=record.get(mm_schemas.WriterEvent.APPLICATION_NAME),
|
|
523
|
+
name=record.get(name_column),
|
|
524
|
+
**{"kind": record.get(mm_schemas.ResultData.RESULT_KIND)}
|
|
525
|
+
if type == "result"
|
|
526
|
+
else {},
|
|
527
|
+
),
|
|
528
|
+
group[grouped_by_fields].to_dict(orient="records"),
|
|
529
|
+
)
|
|
530
|
+
)
|
|
531
|
+
).to_dict()
|
|
532
|
+
return grouped_dict
|
|
533
|
+
|
|
534
|
+
@staticmethod
|
|
535
|
+
def df_to_events_intersection_dict(
|
|
536
|
+
*,
|
|
537
|
+
df: pd.DataFrame,
|
|
538
|
+
project: str,
|
|
539
|
+
type: typing.Union[str, mm_schemas.ModelEndpointMonitoringMetricType],
|
|
540
|
+
) -> dict[str, list[mm_schemas.ModelEndpointMonitoringMetric]]:
|
|
541
|
+
"""
|
|
542
|
+
Parse a DataFrame of metrics from the TSDB into a dict of intersection metrics/results by name and application
|
|
543
|
+
(and kind in results).
|
|
544
|
+
|
|
545
|
+
:param df: The DataFrame to parse.
|
|
546
|
+
:param project: The project name.
|
|
547
|
+
:param type: The type of the metrics (either "result" or "metric").
|
|
548
|
+
|
|
549
|
+
:return: A dictionary where the key is event type (as defined by `INTERSECT_DICT_KEYS`),
|
|
550
|
+
and the value is a list containing the intersect metrics or results across all endpoint IDs.
|
|
551
|
+
|
|
552
|
+
For example:
|
|
553
|
+
{
|
|
554
|
+
"intersect_metrics": [...]
|
|
555
|
+
}
|
|
556
|
+
"""
|
|
557
|
+
dict_key = mm_schemas.INTERSECT_DICT_KEYS[type]
|
|
558
|
+
metrics = []
|
|
559
|
+
if df.empty:
|
|
560
|
+
return {dict_key: []}
|
|
561
|
+
|
|
562
|
+
columns_to_zip = [mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
563
|
+
|
|
564
|
+
if type == "result":
|
|
565
|
+
name_column = mm_schemas.ResultData.RESULT_NAME
|
|
566
|
+
columns_to_zip.append(mm_schemas.ResultData.RESULT_KIND)
|
|
567
|
+
else:
|
|
568
|
+
name_column = mm_schemas.MetricData.METRIC_NAME
|
|
569
|
+
columns_to_zip.insert(1, name_column)
|
|
570
|
+
|
|
571
|
+
# groupby has different behavior for category columns
|
|
572
|
+
df["endpoint_id"] = df["endpoint_id"].astype(str)
|
|
573
|
+
df["event_values"] = list(zip(*[df[col] for col in columns_to_zip]))
|
|
574
|
+
grouped_by_event_values = df.groupby("endpoint_id")["event_values"].apply(set)
|
|
575
|
+
common_event_values_combinations = set.intersection(*grouped_by_event_values)
|
|
576
|
+
result_kind = None
|
|
577
|
+
for data in common_event_values_combinations:
|
|
578
|
+
application_name, event_name = data[0], data[1]
|
|
579
|
+
if len(data) > 2: # in result case
|
|
580
|
+
result_kind = data[2]
|
|
581
|
+
metrics.append(
|
|
582
|
+
mm_schemas.ModelEndpointMonitoringMetric(
|
|
583
|
+
project=project,
|
|
584
|
+
type=type,
|
|
585
|
+
app=application_name,
|
|
586
|
+
name=event_name,
|
|
587
|
+
kind=result_kind,
|
|
588
|
+
)
|
|
589
|
+
)
|
|
590
|
+
return {dict_key: metrics}
|
|
591
|
+
|
|
484
592
|
@staticmethod
|
|
485
593
|
def _get_start_end(
|
|
486
594
|
start: typing.Union[datetime, None],
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
|
-
from datetime import datetime, timedelta
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
17
|
|
|
18
18
|
import pandas as pd
|
|
19
19
|
import taosws
|
|
@@ -164,6 +164,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
164
164
|
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
165
165
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
166
166
|
|
|
167
|
+
@staticmethod
|
|
168
|
+
def _get_endpoint_filter(endpoint_id: typing.Union[str, list[str]]):
|
|
169
|
+
if isinstance(endpoint_id, str):
|
|
170
|
+
return f"endpoint_id='{endpoint_id}'"
|
|
171
|
+
elif isinstance(endpoint_id, list):
|
|
172
|
+
return f"endpoint_id IN({str(endpoint_id)[1:-1]}) "
|
|
173
|
+
else:
|
|
174
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
175
|
+
"Invalid 'endpoint_id' filter: must be a string or a list."
|
|
176
|
+
)
|
|
177
|
+
|
|
167
178
|
def apply_monitoring_stream_steps(self, graph, **kwarg):
|
|
168
179
|
"""
|
|
169
180
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
@@ -542,12 +553,11 @@ class TDEngineConnector(TSDBConnector):
|
|
|
542
553
|
},
|
|
543
554
|
inplace=True,
|
|
544
555
|
)
|
|
545
|
-
df[mm_schemas.EventFieldType.LAST_REQUEST] =
|
|
546
|
-
mm_schemas.EventFieldType.LAST_REQUEST
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
).astimezone(tz=timezone.utc)
|
|
556
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST] = pd.to_datetime(
|
|
557
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST],
|
|
558
|
+
errors="coerce",
|
|
559
|
+
format="ISO8601",
|
|
560
|
+
utc=True,
|
|
551
561
|
)
|
|
552
562
|
return df
|
|
553
563
|
|
|
@@ -588,7 +598,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
588
598
|
|
|
589
599
|
def get_metrics_metadata(
|
|
590
600
|
self,
|
|
591
|
-
endpoint_id: str,
|
|
601
|
+
endpoint_id: typing.Union[str, list[str]],
|
|
592
602
|
start: typing.Optional[datetime] = None,
|
|
593
603
|
end: typing.Optional[datetime] = None,
|
|
594
604
|
) -> pd.DataFrame:
|
|
@@ -602,11 +612,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
602
612
|
mm_schemas.MetricData.METRIC_NAME,
|
|
603
613
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
604
614
|
],
|
|
605
|
-
filter_query=
|
|
615
|
+
filter_query=self._get_endpoint_filter(endpoint_id=endpoint_id),
|
|
606
616
|
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
607
617
|
group_by=[
|
|
608
618
|
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
609
619
|
mm_schemas.MetricData.METRIC_NAME,
|
|
620
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
610
621
|
],
|
|
611
622
|
agg_funcs=["last"],
|
|
612
623
|
)
|
|
@@ -624,7 +635,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
624
635
|
|
|
625
636
|
def get_results_metadata(
|
|
626
637
|
self,
|
|
627
|
-
endpoint_id: str,
|
|
638
|
+
endpoint_id: typing.Union[str, list[str]],
|
|
628
639
|
start: typing.Optional[datetime] = None,
|
|
629
640
|
end: typing.Optional[datetime] = None,
|
|
630
641
|
) -> pd.DataFrame:
|
|
@@ -639,11 +650,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
639
650
|
mm_schemas.ResultData.RESULT_KIND,
|
|
640
651
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
641
652
|
],
|
|
642
|
-
filter_query=
|
|
653
|
+
filter_query=self._get_endpoint_filter(endpoint_id=endpoint_id),
|
|
643
654
|
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
644
655
|
group_by=[
|
|
645
656
|
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
646
657
|
mm_schemas.ResultData.RESULT_NAME,
|
|
658
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
647
659
|
],
|
|
648
660
|
agg_funcs=["last"],
|
|
649
661
|
)
|
|
@@ -33,6 +33,8 @@ _TSDB_BE = "tsdb"
|
|
|
33
33
|
_TSDB_RATE = "1/s"
|
|
34
34
|
_CONTAINER = "users"
|
|
35
35
|
|
|
36
|
+
V3IO_MEPS_LIMIT = 50 # TODO remove limitation after fixing ML-8886
|
|
37
|
+
|
|
36
38
|
|
|
37
39
|
def _is_no_schema_error(exc: v3io_frames.Error) -> bool:
|
|
38
40
|
"""
|
|
@@ -577,6 +579,21 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
577
579
|
token=v3io_access_key,
|
|
578
580
|
)
|
|
579
581
|
|
|
582
|
+
@staticmethod
|
|
583
|
+
def _get_endpoint_filter(endpoint_id: Union[str, list[str]]):
|
|
584
|
+
if isinstance(endpoint_id, str):
|
|
585
|
+
return f"endpoint_id=='{endpoint_id}'"
|
|
586
|
+
elif isinstance(endpoint_id, list):
|
|
587
|
+
if len(endpoint_id) > V3IO_MEPS_LIMIT:
|
|
588
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
589
|
+
f"Filtering more than {V3IO_MEPS_LIMIT} model endpoints in the V3IO connector is not supported."
|
|
590
|
+
)
|
|
591
|
+
return f"endpoint_id IN({str(endpoint_id)[1:-1]}) "
|
|
592
|
+
else:
|
|
593
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
594
|
+
f"Invalid 'endpoint_id' filter: must be a string or a list, endpoint_id: {endpoint_id}"
|
|
595
|
+
)
|
|
596
|
+
|
|
580
597
|
def read_metrics_data(
|
|
581
598
|
self,
|
|
582
599
|
*,
|
|
@@ -813,17 +830,18 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
813
830
|
|
|
814
831
|
def get_metrics_metadata(
|
|
815
832
|
self,
|
|
816
|
-
endpoint_id: str,
|
|
833
|
+
endpoint_id: Union[str, list[str]],
|
|
817
834
|
start: Optional[datetime] = None,
|
|
818
835
|
end: Optional[datetime] = None,
|
|
819
836
|
) -> pd.DataFrame:
|
|
820
837
|
start, end = self._get_start_end(start, end)
|
|
838
|
+
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_id)
|
|
821
839
|
df = self._get_records(
|
|
822
840
|
table=mm_schemas.V3IOTSDBTables.METRICS,
|
|
823
841
|
start=start,
|
|
824
842
|
end=end,
|
|
825
843
|
columns=[mm_schemas.MetricData.METRIC_VALUE],
|
|
826
|
-
filter_query=
|
|
844
|
+
filter_query=filter_query,
|
|
827
845
|
agg_funcs=["last"],
|
|
828
846
|
)
|
|
829
847
|
if not df.empty:
|
|
@@ -834,11 +852,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
834
852
|
|
|
835
853
|
def get_results_metadata(
|
|
836
854
|
self,
|
|
837
|
-
endpoint_id: str,
|
|
855
|
+
endpoint_id: Union[str, list[str]],
|
|
838
856
|
start: Optional[datetime] = None,
|
|
839
857
|
end: Optional[datetime] = None,
|
|
840
858
|
) -> pd.DataFrame:
|
|
841
859
|
start, end = self._get_start_end(start, end)
|
|
860
|
+
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_id)
|
|
842
861
|
df = self._get_records(
|
|
843
862
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
844
863
|
start=start,
|
|
@@ -846,7 +865,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
846
865
|
columns=[
|
|
847
866
|
mm_schemas.ResultData.RESULT_KIND,
|
|
848
867
|
],
|
|
849
|
-
filter_query=
|
|
868
|
+
filter_query=filter_query,
|
|
850
869
|
agg_funcs=["last"],
|
|
851
870
|
)
|
|
852
871
|
if not df.empty:
|
|
@@ -32,7 +32,7 @@ import mlrun.utils.helpers
|
|
|
32
32
|
from mlrun.common.schemas import ModelEndpoint
|
|
33
33
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
34
34
|
ModelEndpointMonitoringMetric,
|
|
35
|
-
|
|
35
|
+
compose_full_name,
|
|
36
36
|
)
|
|
37
37
|
from mlrun.utils import logger
|
|
38
38
|
|
|
@@ -450,7 +450,7 @@ def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
|
450
450
|
|
|
451
451
|
|
|
452
452
|
def get_invocations_fqn(project: str) -> str:
|
|
453
|
-
return
|
|
453
|
+
return compose_full_name(
|
|
454
454
|
project=project,
|
|
455
455
|
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
456
456
|
name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -748,7 +748,8 @@ class _LocalRunner(_PipelineRunner):
|
|
|
748
748
|
project.set_source(source=source)
|
|
749
749
|
pipeline_context.workflow_artifact_path = artifact_path
|
|
750
750
|
|
|
751
|
-
|
|
751
|
+
# TODO: we should create endpoint for sending custom notification from BE
|
|
752
|
+
project.notifiers.push_pipeline_start_message_from_client(
|
|
752
753
|
project.metadata.name, pipeline_id=workflow_id
|
|
753
754
|
)
|
|
754
755
|
err = None
|