mlrun 1.8.0rc30__py3-none-any.whl → 1.8.0rc32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +2 -35
- mlrun/api/schemas/__init__.py +1 -6
- mlrun/common/runtimes/constants.py +4 -0
- mlrun/common/schemas/__init__.py +0 -2
- mlrun/common/schemas/model_monitoring/__init__.py +0 -2
- mlrun/common/schemas/model_monitoring/constants.py +1 -6
- mlrun/common/schemas/model_monitoring/grafana.py +17 -11
- mlrun/config.py +9 -36
- mlrun/datastore/storeytargets.py +20 -3
- mlrun/db/base.py +1 -1
- mlrun/db/httpdb.py +5 -4
- mlrun/db/nopdb.py +1 -1
- mlrun/model_monitoring/applications/base.py +111 -40
- mlrun/model_monitoring/applications/results.py +2 -2
- mlrun/model_monitoring/controller.py +4 -3
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
- mlrun/model_monitoring/db/tsdb/base.py +60 -39
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -52
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
- mlrun/model_monitoring/helpers.py +16 -15
- mlrun/model_monitoring/stream_processing.py +6 -13
- mlrun/projects/pipelines.py +11 -3
- mlrun/projects/project.py +88 -111
- mlrun/serving/states.py +1 -1
- mlrun/serving/v2_serving.py +20 -10
- mlrun/utils/helpers.py +1 -1
- mlrun/utils/logger.py +13 -10
- mlrun/utils/notifications/notification_pusher.py +24 -0
- mlrun/utils/regex.py +1 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/METADATA +2 -2
- {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/RECORD +36 -36
- {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/top_level.txt +0 -0
|
@@ -12,12 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import typing
|
|
16
15
|
from abc import ABC, abstractmethod
|
|
17
16
|
from datetime import datetime
|
|
17
|
+
from typing import Callable, ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import pydantic.v1
|
|
21
|
+
import v3io_frames.client
|
|
21
22
|
|
|
22
23
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
24
|
import mlrun.model_monitoring.db.tsdb.helpers
|
|
@@ -26,7 +27,7 @@ from mlrun.utils import logger
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class TSDBConnector(ABC):
|
|
29
|
-
type:
|
|
30
|
+
type: ClassVar[str]
|
|
30
31
|
|
|
31
32
|
def __init__(self, project: str) -> None:
|
|
32
33
|
"""
|
|
@@ -130,17 +131,17 @@ class TSDBConnector(ABC):
|
|
|
130
131
|
start: datetime,
|
|
131
132
|
end: datetime,
|
|
132
133
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
133
|
-
type:
|
|
134
|
+
type: Literal["metrics", "results"],
|
|
134
135
|
with_result_extra_data: bool,
|
|
135
|
-
) ->
|
|
136
|
+
) -> Union[
|
|
136
137
|
list[
|
|
137
|
-
|
|
138
|
+
Union[
|
|
138
139
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
139
140
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
140
141
|
],
|
|
141
142
|
],
|
|
142
143
|
list[
|
|
143
|
-
|
|
144
|
+
Union[
|
|
144
145
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
145
146
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
146
147
|
],
|
|
@@ -166,10 +167,10 @@ class TSDBConnector(ABC):
|
|
|
166
167
|
endpoint_id: str,
|
|
167
168
|
start: datetime,
|
|
168
169
|
end: datetime,
|
|
169
|
-
aggregation_window:
|
|
170
|
-
agg_funcs:
|
|
171
|
-
limit:
|
|
172
|
-
) ->
|
|
170
|
+
aggregation_window: Optional[str] = None,
|
|
171
|
+
agg_funcs: Optional[list[str]] = None,
|
|
172
|
+
limit: Optional[int] = None,
|
|
173
|
+
) -> Union[
|
|
173
174
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
174
175
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
175
176
|
]:
|
|
@@ -195,10 +196,11 @@ class TSDBConnector(ABC):
|
|
|
195
196
|
@abstractmethod
|
|
196
197
|
def get_last_request(
|
|
197
198
|
self,
|
|
198
|
-
endpoint_ids:
|
|
199
|
-
start:
|
|
200
|
-
end:
|
|
201
|
-
|
|
199
|
+
endpoint_ids: Union[str, list[str]],
|
|
200
|
+
start: Optional[datetime] = None,
|
|
201
|
+
end: Optional[datetime] = None,
|
|
202
|
+
get_raw: bool = False,
|
|
203
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
202
204
|
"""
|
|
203
205
|
Fetches data from the predictions TSDB table and returns the most recent request
|
|
204
206
|
timestamp for each specified endpoint.
|
|
@@ -206,6 +208,8 @@ class TSDBConnector(ABC):
|
|
|
206
208
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
207
209
|
:param start: The start time for the query.
|
|
208
210
|
:param end: The end time for the query.
|
|
211
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
212
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
209
213
|
|
|
210
214
|
:return: A pd.DataFrame containing the columns [endpoint_id, last_request, last_latency].
|
|
211
215
|
If an endpoint has not been invoked within the specified time range, it will not appear in the result.
|
|
@@ -214,10 +218,11 @@ class TSDBConnector(ABC):
|
|
|
214
218
|
@abstractmethod
|
|
215
219
|
def get_drift_status(
|
|
216
220
|
self,
|
|
217
|
-
endpoint_ids:
|
|
218
|
-
start:
|
|
219
|
-
end:
|
|
220
|
-
|
|
221
|
+
endpoint_ids: Union[str, list[str]],
|
|
222
|
+
start: Optional[datetime] = None,
|
|
223
|
+
end: Optional[datetime] = None,
|
|
224
|
+
get_raw: bool = False,
|
|
225
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
221
226
|
"""
|
|
222
227
|
Fetches data from the app-results TSDB table and returns the highest status among all
|
|
223
228
|
the result in the provided time range, which by default is the last 24 hours, for each specified endpoint.
|
|
@@ -225,6 +230,8 @@ class TSDBConnector(ABC):
|
|
|
225
230
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
226
231
|
:param start: The start time for the query.
|
|
227
232
|
:param end: The end time for the query.
|
|
233
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
234
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
228
235
|
|
|
229
236
|
:return: A pd.DataFrame containing the columns [result_status, endpoint_id].
|
|
230
237
|
If an endpoint has not been monitored within the specified time range (last 24 hours),
|
|
@@ -234,9 +241,9 @@ class TSDBConnector(ABC):
|
|
|
234
241
|
@abstractmethod
|
|
235
242
|
def get_metrics_metadata(
|
|
236
243
|
self,
|
|
237
|
-
endpoint_id:
|
|
238
|
-
start:
|
|
239
|
-
end:
|
|
244
|
+
endpoint_id: Union[str, list[str]],
|
|
245
|
+
start: Optional[datetime] = None,
|
|
246
|
+
end: Optional[datetime] = None,
|
|
240
247
|
) -> pd.DataFrame:
|
|
241
248
|
"""
|
|
242
249
|
Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoints.
|
|
@@ -252,9 +259,9 @@ class TSDBConnector(ABC):
|
|
|
252
259
|
@abstractmethod
|
|
253
260
|
def get_results_metadata(
|
|
254
261
|
self,
|
|
255
|
-
endpoint_id:
|
|
256
|
-
start:
|
|
257
|
-
end:
|
|
262
|
+
endpoint_id: Union[str, list[str]],
|
|
263
|
+
start: Optional[datetime] = None,
|
|
264
|
+
end: Optional[datetime] = None,
|
|
258
265
|
) -> pd.DataFrame:
|
|
259
266
|
"""
|
|
260
267
|
Fetches distinct results metadata from the app-results TSDB table for a specified model endpoints.
|
|
@@ -270,16 +277,19 @@ class TSDBConnector(ABC):
|
|
|
270
277
|
@abstractmethod
|
|
271
278
|
def get_error_count(
|
|
272
279
|
self,
|
|
273
|
-
endpoint_ids:
|
|
274
|
-
start:
|
|
275
|
-
end:
|
|
276
|
-
|
|
280
|
+
endpoint_ids: Union[str, list[str]],
|
|
281
|
+
start: Optional[datetime] = None,
|
|
282
|
+
end: Optional[datetime] = None,
|
|
283
|
+
get_raw: bool = False,
|
|
284
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
277
285
|
"""
|
|
278
286
|
Fetches data from the error TSDB table and returns the error count for each specified endpoint.
|
|
279
287
|
|
|
280
288
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
281
289
|
:param start: The start time for the query.
|
|
282
290
|
:param end: The end time for the query.
|
|
291
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
292
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
283
293
|
|
|
284
294
|
:return: A pd.DataFrame containing the columns [error_count, endpoint_id].
|
|
285
295
|
If an endpoint have not raised error within the specified time range, it will not appear in the result.
|
|
@@ -288,10 +298,11 @@ class TSDBConnector(ABC):
|
|
|
288
298
|
@abstractmethod
|
|
289
299
|
def get_avg_latency(
|
|
290
300
|
self,
|
|
291
|
-
endpoint_ids:
|
|
292
|
-
start:
|
|
293
|
-
end:
|
|
294
|
-
|
|
301
|
+
endpoint_ids: Union[str, list[str]],
|
|
302
|
+
start: Optional[datetime] = None,
|
|
303
|
+
end: Optional[datetime] = None,
|
|
304
|
+
get_raw: bool = False,
|
|
305
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
295
306
|
"""
|
|
296
307
|
Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
|
|
297
308
|
in the provided time range, which by default is the last 24 hours.
|
|
@@ -299,11 +310,21 @@ class TSDBConnector(ABC):
|
|
|
299
310
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
300
311
|
:param start: The start time for the query.
|
|
301
312
|
:param end: The end time for the query.
|
|
313
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
314
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
302
315
|
|
|
303
316
|
:return: A pd.DataFrame containing the columns [avg_latency, endpoint_id].
|
|
304
317
|
If an endpoint has not been invoked within the specified time range, it will not appear in the result.
|
|
305
318
|
"""
|
|
306
319
|
|
|
320
|
+
async def add_basic_metrics(
|
|
321
|
+
self,
|
|
322
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
323
|
+
project: str,
|
|
324
|
+
run_in_threadpool: Callable,
|
|
325
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
326
|
+
raise NotImplementedError()
|
|
327
|
+
|
|
307
328
|
@staticmethod
|
|
308
329
|
def df_to_metrics_values(
|
|
309
330
|
*,
|
|
@@ -311,7 +332,7 @@ class TSDBConnector(ABC):
|
|
|
311
332
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
312
333
|
project: str,
|
|
313
334
|
) -> list[
|
|
314
|
-
|
|
335
|
+
Union[
|
|
315
336
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
316
337
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
317
338
|
]
|
|
@@ -324,7 +345,7 @@ class TSDBConnector(ABC):
|
|
|
324
345
|
metrics_without_data = {metric.full_name: metric for metric in metrics}
|
|
325
346
|
|
|
326
347
|
metrics_values: list[
|
|
327
|
-
|
|
348
|
+
Union[
|
|
328
349
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
329
350
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
330
351
|
]
|
|
@@ -377,7 +398,7 @@ class TSDBConnector(ABC):
|
|
|
377
398
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
378
399
|
project: str,
|
|
379
400
|
) -> list[
|
|
380
|
-
|
|
401
|
+
Union[
|
|
381
402
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
382
403
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
383
404
|
]
|
|
@@ -390,7 +411,7 @@ class TSDBConnector(ABC):
|
|
|
390
411
|
metrics_without_data = {metric.full_name: metric for metric in metrics}
|
|
391
412
|
|
|
392
413
|
metrics_values: list[
|
|
393
|
-
|
|
414
|
+
Union[
|
|
394
415
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
395
416
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
396
417
|
]
|
|
@@ -536,7 +557,7 @@ class TSDBConnector(ABC):
|
|
|
536
557
|
*,
|
|
537
558
|
df: pd.DataFrame,
|
|
538
559
|
project: str,
|
|
539
|
-
type:
|
|
560
|
+
type: Union[str, mm_schemas.ModelEndpointMonitoringMetricType],
|
|
540
561
|
) -> dict[str, list[mm_schemas.ModelEndpointMonitoringMetric]]:
|
|
541
562
|
"""
|
|
542
563
|
Parse a DataFrame of metrics from the TSDB into a dict of intersection metrics/results by name and application
|
|
@@ -591,8 +612,8 @@ class TSDBConnector(ABC):
|
|
|
591
612
|
|
|
592
613
|
@staticmethod
|
|
593
614
|
def _get_start_end(
|
|
594
|
-
start:
|
|
595
|
-
end:
|
|
615
|
+
start: Union[datetime, None],
|
|
616
|
+
end: Union[datetime, None],
|
|
596
617
|
) -> tuple[datetime, datetime]:
|
|
597
618
|
"""
|
|
598
619
|
static utils function for tsdb start end format
|
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import typing
|
|
14
|
+
import asyncio
|
|
16
15
|
from datetime import datetime, timedelta
|
|
16
|
+
from typing import Callable, Literal, Optional, Union
|
|
17
17
|
|
|
18
18
|
import pandas as pd
|
|
19
19
|
import taosws
|
|
@@ -25,6 +25,7 @@ from taoswswrap.tdengine_connection import (
|
|
|
25
25
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
26
26
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
27
27
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
28
|
+
from mlrun.datastore.datastore_profile import DatastoreProfile
|
|
28
29
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
29
30
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
30
31
|
from mlrun.utils import logger
|
|
@@ -40,16 +41,13 @@ class TDEngineConnector(TSDBConnector):
|
|
|
40
41
|
def __init__(
|
|
41
42
|
self,
|
|
42
43
|
project: str,
|
|
43
|
-
|
|
44
|
+
profile: DatastoreProfile,
|
|
45
|
+
database: Optional[str] = None,
|
|
44
46
|
**kwargs,
|
|
45
47
|
):
|
|
46
48
|
super().__init__(project=project)
|
|
47
|
-
if "connection_string" not in kwargs:
|
|
48
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
49
|
-
"connection_string is a required parameter for TDEngineConnector."
|
|
50
|
-
)
|
|
51
49
|
|
|
52
|
-
self.
|
|
50
|
+
self._tdengine_connection_profile = profile
|
|
53
51
|
self.database = (
|
|
54
52
|
database
|
|
55
53
|
or f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
|
|
@@ -70,7 +68,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
70
68
|
def _create_connection(self) -> TDEngineConnection:
|
|
71
69
|
"""Establish a connection to the TSDB server."""
|
|
72
70
|
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
73
|
-
conn = TDEngineConnection(self.
|
|
71
|
+
conn = TDEngineConnection(self._tdengine_connection_profile.dsn())
|
|
74
72
|
conn.run(
|
|
75
73
|
statements=f"CREATE DATABASE IF NOT EXISTS {self.database}",
|
|
76
74
|
timeout=self._timeout,
|
|
@@ -168,11 +166,11 @@ class TDEngineConnector(TSDBConnector):
|
|
|
168
166
|
)
|
|
169
167
|
|
|
170
168
|
@staticmethod
|
|
171
|
-
def _convert_to_datetime(val:
|
|
169
|
+
def _convert_to_datetime(val: Union[str, datetime]) -> datetime:
|
|
172
170
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
173
171
|
|
|
174
172
|
@staticmethod
|
|
175
|
-
def _get_endpoint_filter(endpoint_id:
|
|
173
|
+
def _get_endpoint_filter(endpoint_id: Union[str, list[str]]) -> str:
|
|
176
174
|
if isinstance(endpoint_id, str):
|
|
177
175
|
return f"endpoint_id='{endpoint_id}'"
|
|
178
176
|
elif isinstance(endpoint_id, list):
|
|
@@ -200,10 +198,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
200
198
|
|
|
201
199
|
def apply_tdengine_target(name, after):
|
|
202
200
|
graph.add_step(
|
|
203
|
-
"
|
|
201
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
204
202
|
name=name,
|
|
205
203
|
after=after,
|
|
206
|
-
url=self.
|
|
204
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
207
205
|
supertable=self.tables[
|
|
208
206
|
mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
209
207
|
].super_table,
|
|
@@ -242,10 +240,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
242
240
|
after="ForwardError",
|
|
243
241
|
)
|
|
244
242
|
graph.add_step(
|
|
245
|
-
"
|
|
243
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
246
244
|
name="tsdb_error",
|
|
247
245
|
after="error_extractor",
|
|
248
|
-
url=self.
|
|
246
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
249
247
|
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
250
248
|
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
251
249
|
time_col=mm_schemas.EventFieldType.TIME,
|
|
@@ -307,17 +305,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
307
305
|
table: str,
|
|
308
306
|
start: datetime,
|
|
309
307
|
end: datetime,
|
|
310
|
-
columns:
|
|
311
|
-
filter_query:
|
|
312
|
-
interval:
|
|
313
|
-
agg_funcs:
|
|
314
|
-
limit:
|
|
315
|
-
sliding_window_step:
|
|
308
|
+
columns: Optional[list[str]] = None,
|
|
309
|
+
filter_query: Optional[str] = None,
|
|
310
|
+
interval: Optional[str] = None,
|
|
311
|
+
agg_funcs: Optional[list] = None,
|
|
312
|
+
limit: Optional[int] = None,
|
|
313
|
+
sliding_window_step: Optional[str] = None,
|
|
316
314
|
timestamp_column: str = mm_schemas.EventFieldType.TIME,
|
|
317
|
-
group_by:
|
|
318
|
-
preform_agg_columns:
|
|
319
|
-
order_by:
|
|
320
|
-
desc:
|
|
315
|
+
group_by: Optional[Union[list[str], str]] = None,
|
|
316
|
+
preform_agg_columns: Optional[list] = None,
|
|
317
|
+
order_by: Optional[str] = None,
|
|
318
|
+
desc: Optional[bool] = None,
|
|
321
319
|
) -> pd.DataFrame:
|
|
322
320
|
"""
|
|
323
321
|
Getting records from TSDB data collection.
|
|
@@ -387,17 +385,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
387
385
|
start: datetime,
|
|
388
386
|
end: datetime,
|
|
389
387
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
390
|
-
type:
|
|
388
|
+
type: Literal["metrics", "results"],
|
|
391
389
|
with_result_extra_data: bool = False,
|
|
392
|
-
) ->
|
|
390
|
+
) -> Union[
|
|
393
391
|
list[
|
|
394
|
-
|
|
392
|
+
Union[
|
|
395
393
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
396
394
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
397
395
|
],
|
|
398
396
|
],
|
|
399
397
|
list[
|
|
400
|
-
|
|
398
|
+
Union[
|
|
401
399
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
402
400
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
403
401
|
],
|
|
@@ -475,10 +473,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
475
473
|
endpoint_id: str,
|
|
476
474
|
start: datetime,
|
|
477
475
|
end: datetime,
|
|
478
|
-
aggregation_window:
|
|
479
|
-
agg_funcs:
|
|
480
|
-
limit:
|
|
481
|
-
) ->
|
|
476
|
+
aggregation_window: Optional[str] = None,
|
|
477
|
+
agg_funcs: Optional[list] = None,
|
|
478
|
+
limit: Optional[int] = None,
|
|
479
|
+
) -> Union[
|
|
482
480
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
483
481
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
484
482
|
]:
|
|
@@ -530,9 +528,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
530
528
|
|
|
531
529
|
def get_last_request(
|
|
532
530
|
self,
|
|
533
|
-
endpoint_ids:
|
|
534
|
-
start:
|
|
535
|
-
end:
|
|
531
|
+
endpoint_ids: Union[str, list[str]],
|
|
532
|
+
start: Optional[datetime] = None,
|
|
533
|
+
end: Optional[datetime] = None,
|
|
534
|
+
get_raw: bool = False,
|
|
536
535
|
) -> pd.DataFrame:
|
|
537
536
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
538
537
|
start, end = self._get_start_end(start, end)
|
|
@@ -570,9 +569,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
570
569
|
|
|
571
570
|
def get_drift_status(
|
|
572
571
|
self,
|
|
573
|
-
endpoint_ids:
|
|
574
|
-
start:
|
|
575
|
-
end:
|
|
572
|
+
endpoint_ids: Union[str, list[str]],
|
|
573
|
+
start: Optional[datetime] = None,
|
|
574
|
+
end: Optional[datetime] = None,
|
|
575
|
+
get_raw: bool = False,
|
|
576
576
|
) -> pd.DataFrame:
|
|
577
577
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
578
578
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
@@ -603,9 +603,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
603
603
|
|
|
604
604
|
def get_metrics_metadata(
|
|
605
605
|
self,
|
|
606
|
-
endpoint_id:
|
|
607
|
-
start:
|
|
608
|
-
end:
|
|
606
|
+
endpoint_id: Union[str, list[str]],
|
|
607
|
+
start: Optional[datetime] = None,
|
|
608
|
+
end: Optional[datetime] = None,
|
|
609
609
|
) -> pd.DataFrame:
|
|
610
610
|
start, end = self._get_start_end(start, end)
|
|
611
611
|
df = self._get_records(
|
|
@@ -640,9 +640,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
640
640
|
|
|
641
641
|
def get_results_metadata(
|
|
642
642
|
self,
|
|
643
|
-
endpoint_id:
|
|
644
|
-
start:
|
|
645
|
-
end:
|
|
643
|
+
endpoint_id: Union[str, list[str]],
|
|
644
|
+
start: Optional[datetime] = None,
|
|
645
|
+
end: Optional[datetime] = None,
|
|
646
646
|
) -> pd.DataFrame:
|
|
647
647
|
start, end = self._get_start_end(start, end)
|
|
648
648
|
df = self._get_records(
|
|
@@ -679,9 +679,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
679
679
|
|
|
680
680
|
def get_error_count(
|
|
681
681
|
self,
|
|
682
|
-
endpoint_ids:
|
|
683
|
-
start:
|
|
684
|
-
end:
|
|
682
|
+
endpoint_ids: Union[str, list[str]],
|
|
683
|
+
start: Optional[datetime] = None,
|
|
684
|
+
end: Optional[datetime] = None,
|
|
685
|
+
get_raw: bool = False,
|
|
685
686
|
) -> pd.DataFrame:
|
|
686
687
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
687
688
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
@@ -709,9 +710,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
709
710
|
|
|
710
711
|
def get_avg_latency(
|
|
711
712
|
self,
|
|
712
|
-
endpoint_ids:
|
|
713
|
-
start:
|
|
714
|
-
end:
|
|
713
|
+
endpoint_ids: Union[str, list[str]],
|
|
714
|
+
start: Optional[datetime] = None,
|
|
715
|
+
end: Optional[datetime] = None,
|
|
716
|
+
get_raw: bool = False,
|
|
715
717
|
) -> pd.DataFrame:
|
|
716
718
|
endpoint_ids = (
|
|
717
719
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
@@ -739,11 +741,74 @@ class TDEngineConnector(TSDBConnector):
|
|
|
739
741
|
df.dropna(inplace=True)
|
|
740
742
|
return df
|
|
741
743
|
|
|
744
|
+
async def add_basic_metrics(
|
|
745
|
+
self,
|
|
746
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
747
|
+
project: str,
|
|
748
|
+
run_in_threadpool: Callable,
|
|
749
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
750
|
+
"""
|
|
751
|
+
Add basic metrics to the model endpoint object.
|
|
752
|
+
|
|
753
|
+
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
754
|
+
be filled with the relevant basic metrics.
|
|
755
|
+
:param project: The name of the project.
|
|
756
|
+
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
757
|
+
|
|
758
|
+
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
759
|
+
"""
|
|
760
|
+
|
|
761
|
+
uids = [mep.metadata.uid for mep in model_endpoint_objects]
|
|
762
|
+
coroutines = [
|
|
763
|
+
run_in_threadpool(self.get_error_count, endpoint_ids=uids),
|
|
764
|
+
run_in_threadpool(self.get_last_request, endpoint_ids=uids),
|
|
765
|
+
run_in_threadpool(self.get_avg_latency, endpoint_ids=uids),
|
|
766
|
+
run_in_threadpool(self.get_drift_status, endpoint_ids=uids),
|
|
767
|
+
]
|
|
768
|
+
|
|
769
|
+
(
|
|
770
|
+
error_count_df,
|
|
771
|
+
last_request_df,
|
|
772
|
+
avg_latency_df,
|
|
773
|
+
drift_status_df,
|
|
774
|
+
) = await asyncio.gather(*coroutines)
|
|
775
|
+
|
|
776
|
+
def add_metrics(
|
|
777
|
+
mep: mlrun.common.schemas.ModelEndpoint,
|
|
778
|
+
df_dictionary: dict[str, pd.DataFrame],
|
|
779
|
+
):
|
|
780
|
+
for metric in df_dictionary.keys():
|
|
781
|
+
df = df_dictionary.get(metric, pd.DataFrame())
|
|
782
|
+
if not df.empty:
|
|
783
|
+
line = df[df["endpoint_id"] == mep.metadata.uid]
|
|
784
|
+
if not line.empty and metric in line:
|
|
785
|
+
value = line[metric].item()
|
|
786
|
+
if isinstance(value, pd.Timestamp):
|
|
787
|
+
value = value.to_pydatetime()
|
|
788
|
+
setattr(mep.status, metric, value)
|
|
789
|
+
|
|
790
|
+
return mep
|
|
791
|
+
|
|
792
|
+
return list(
|
|
793
|
+
map(
|
|
794
|
+
lambda mep: add_metrics(
|
|
795
|
+
mep=mep,
|
|
796
|
+
df_dictionary={
|
|
797
|
+
"error_count": error_count_df,
|
|
798
|
+
"last_request": last_request_df,
|
|
799
|
+
"avg_latency": avg_latency_df,
|
|
800
|
+
"result_status": drift_status_df,
|
|
801
|
+
},
|
|
802
|
+
),
|
|
803
|
+
model_endpoint_objects,
|
|
804
|
+
)
|
|
805
|
+
)
|
|
806
|
+
|
|
742
807
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
743
808
|
#
|
|
744
809
|
# def read_prediction_metric_for_endpoint_if_exists(
|
|
745
810
|
# self, endpoint_id: str
|
|
746
|
-
# ) ->
|
|
811
|
+
# ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
747
812
|
# """
|
|
748
813
|
# Read the "invocations" metric for the provided model endpoint, and return the metric object
|
|
749
814
|
# if it exists.
|