mlrun 1.8.0rc29__py3-none-any.whl → 1.8.0rc31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +2 -34
- mlrun/api/schemas/__init__.py +1 -6
- mlrun/artifacts/document.py +3 -3
- mlrun/artifacts/manager.py +1 -0
- mlrun/artifacts/model.py +3 -3
- mlrun/common/model_monitoring/helpers.py +16 -7
- mlrun/common/runtimes/constants.py +5 -0
- mlrun/common/schemas/__init__.py +0 -2
- mlrun/common/schemas/model_monitoring/__init__.py +0 -2
- mlrun/common/schemas/model_monitoring/constants.py +4 -7
- mlrun/common/schemas/model_monitoring/grafana.py +17 -11
- mlrun/config.py +9 -36
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/sources.py +14 -13
- mlrun/datastore/storeytargets.py +20 -3
- mlrun/db/httpdb.py +4 -30
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +16 -0
- mlrun/model_monitoring/api.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +23 -37
- mlrun/model_monitoring/applications/base.py +55 -40
- mlrun/model_monitoring/applications/context.py +0 -3
- mlrun/model_monitoring/applications/results.py +16 -16
- mlrun/model_monitoring/controller.py +35 -31
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
- mlrun/model_monitoring/db/tsdb/base.py +60 -39
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +122 -53
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
- mlrun/model_monitoring/helpers.py +124 -16
- mlrun/model_monitoring/stream_processing.py +6 -21
- mlrun/projects/pipelines.py +11 -3
- mlrun/projects/project.py +104 -115
- mlrun/run.py +2 -2
- mlrun/runtimes/nuclio/function.py +4 -2
- mlrun/serving/routers.py +3 -4
- mlrun/serving/server.py +10 -8
- mlrun/serving/states.py +12 -2
- mlrun/serving/v2_serving.py +25 -20
- mlrun/utils/async_http.py +32 -19
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/logger.py +14 -10
- mlrun/utils/notifications/notification_pusher.py +25 -0
- mlrun/utils/regex.py +1 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/METADATA +4 -4
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/RECORD +50 -50
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/top_level.txt +0 -0
|
@@ -12,12 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import typing
|
|
16
15
|
from abc import ABC, abstractmethod
|
|
17
16
|
from datetime import datetime
|
|
17
|
+
from typing import Callable, ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import pydantic.v1
|
|
21
|
+
import v3io_frames.client
|
|
21
22
|
|
|
22
23
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
24
|
import mlrun.model_monitoring.db.tsdb.helpers
|
|
@@ -26,7 +27,7 @@ from mlrun.utils import logger
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class TSDBConnector(ABC):
|
|
29
|
-
type:
|
|
30
|
+
type: ClassVar[str]
|
|
30
31
|
|
|
31
32
|
def __init__(self, project: str) -> None:
|
|
32
33
|
"""
|
|
@@ -130,17 +131,17 @@ class TSDBConnector(ABC):
|
|
|
130
131
|
start: datetime,
|
|
131
132
|
end: datetime,
|
|
132
133
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
133
|
-
type:
|
|
134
|
+
type: Literal["metrics", "results"],
|
|
134
135
|
with_result_extra_data: bool,
|
|
135
|
-
) ->
|
|
136
|
+
) -> Union[
|
|
136
137
|
list[
|
|
137
|
-
|
|
138
|
+
Union[
|
|
138
139
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
139
140
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
140
141
|
],
|
|
141
142
|
],
|
|
142
143
|
list[
|
|
143
|
-
|
|
144
|
+
Union[
|
|
144
145
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
145
146
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
146
147
|
],
|
|
@@ -166,10 +167,10 @@ class TSDBConnector(ABC):
|
|
|
166
167
|
endpoint_id: str,
|
|
167
168
|
start: datetime,
|
|
168
169
|
end: datetime,
|
|
169
|
-
aggregation_window:
|
|
170
|
-
agg_funcs:
|
|
171
|
-
limit:
|
|
172
|
-
) ->
|
|
170
|
+
aggregation_window: Optional[str] = None,
|
|
171
|
+
agg_funcs: Optional[list[str]] = None,
|
|
172
|
+
limit: Optional[int] = None,
|
|
173
|
+
) -> Union[
|
|
173
174
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
174
175
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
175
176
|
]:
|
|
@@ -195,10 +196,11 @@ class TSDBConnector(ABC):
|
|
|
195
196
|
@abstractmethod
|
|
196
197
|
def get_last_request(
|
|
197
198
|
self,
|
|
198
|
-
endpoint_ids:
|
|
199
|
-
start:
|
|
200
|
-
end:
|
|
201
|
-
|
|
199
|
+
endpoint_ids: Union[str, list[str]],
|
|
200
|
+
start: Optional[datetime] = None,
|
|
201
|
+
end: Optional[datetime] = None,
|
|
202
|
+
get_raw: bool = False,
|
|
203
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
202
204
|
"""
|
|
203
205
|
Fetches data from the predictions TSDB table and returns the most recent request
|
|
204
206
|
timestamp for each specified endpoint.
|
|
@@ -206,6 +208,8 @@ class TSDBConnector(ABC):
|
|
|
206
208
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
207
209
|
:param start: The start time for the query.
|
|
208
210
|
:param end: The end time for the query.
|
|
211
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
212
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
209
213
|
|
|
210
214
|
:return: A pd.DataFrame containing the columns [endpoint_id, last_request, last_latency].
|
|
211
215
|
If an endpoint has not been invoked within the specified time range, it will not appear in the result.
|
|
@@ -214,10 +218,11 @@ class TSDBConnector(ABC):
|
|
|
214
218
|
@abstractmethod
|
|
215
219
|
def get_drift_status(
|
|
216
220
|
self,
|
|
217
|
-
endpoint_ids:
|
|
218
|
-
start:
|
|
219
|
-
end:
|
|
220
|
-
|
|
221
|
+
endpoint_ids: Union[str, list[str]],
|
|
222
|
+
start: Optional[datetime] = None,
|
|
223
|
+
end: Optional[datetime] = None,
|
|
224
|
+
get_raw: bool = False,
|
|
225
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
221
226
|
"""
|
|
222
227
|
Fetches data from the app-results TSDB table and returns the highest status among all
|
|
223
228
|
the result in the provided time range, which by default is the last 24 hours, for each specified endpoint.
|
|
@@ -225,6 +230,8 @@ class TSDBConnector(ABC):
|
|
|
225
230
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
226
231
|
:param start: The start time for the query.
|
|
227
232
|
:param end: The end time for the query.
|
|
233
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
234
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
228
235
|
|
|
229
236
|
:return: A pd.DataFrame containing the columns [result_status, endpoint_id].
|
|
230
237
|
If an endpoint has not been monitored within the specified time range (last 24 hours),
|
|
@@ -234,9 +241,9 @@ class TSDBConnector(ABC):
|
|
|
234
241
|
@abstractmethod
|
|
235
242
|
def get_metrics_metadata(
|
|
236
243
|
self,
|
|
237
|
-
endpoint_id:
|
|
238
|
-
start:
|
|
239
|
-
end:
|
|
244
|
+
endpoint_id: Union[str, list[str]],
|
|
245
|
+
start: Optional[datetime] = None,
|
|
246
|
+
end: Optional[datetime] = None,
|
|
240
247
|
) -> pd.DataFrame:
|
|
241
248
|
"""
|
|
242
249
|
Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoints.
|
|
@@ -252,9 +259,9 @@ class TSDBConnector(ABC):
|
|
|
252
259
|
@abstractmethod
|
|
253
260
|
def get_results_metadata(
|
|
254
261
|
self,
|
|
255
|
-
endpoint_id:
|
|
256
|
-
start:
|
|
257
|
-
end:
|
|
262
|
+
endpoint_id: Union[str, list[str]],
|
|
263
|
+
start: Optional[datetime] = None,
|
|
264
|
+
end: Optional[datetime] = None,
|
|
258
265
|
) -> pd.DataFrame:
|
|
259
266
|
"""
|
|
260
267
|
Fetches distinct results metadata from the app-results TSDB table for a specified model endpoints.
|
|
@@ -270,16 +277,19 @@ class TSDBConnector(ABC):
|
|
|
270
277
|
@abstractmethod
|
|
271
278
|
def get_error_count(
|
|
272
279
|
self,
|
|
273
|
-
endpoint_ids:
|
|
274
|
-
start:
|
|
275
|
-
end:
|
|
276
|
-
|
|
280
|
+
endpoint_ids: Union[str, list[str]],
|
|
281
|
+
start: Optional[datetime] = None,
|
|
282
|
+
end: Optional[datetime] = None,
|
|
283
|
+
get_raw: bool = False,
|
|
284
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
277
285
|
"""
|
|
278
286
|
Fetches data from the error TSDB table and returns the error count for each specified endpoint.
|
|
279
287
|
|
|
280
288
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
281
289
|
:param start: The start time for the query.
|
|
282
290
|
:param end: The end time for the query.
|
|
291
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
292
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
283
293
|
|
|
284
294
|
:return: A pd.DataFrame containing the columns [error_count, endpoint_id].
|
|
285
295
|
If an endpoint have not raised error within the specified time range, it will not appear in the result.
|
|
@@ -288,10 +298,11 @@ class TSDBConnector(ABC):
|
|
|
288
298
|
@abstractmethod
|
|
289
299
|
def get_avg_latency(
|
|
290
300
|
self,
|
|
291
|
-
endpoint_ids:
|
|
292
|
-
start:
|
|
293
|
-
end:
|
|
294
|
-
|
|
301
|
+
endpoint_ids: Union[str, list[str]],
|
|
302
|
+
start: Optional[datetime] = None,
|
|
303
|
+
end: Optional[datetime] = None,
|
|
304
|
+
get_raw: bool = False,
|
|
305
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
295
306
|
"""
|
|
296
307
|
Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
|
|
297
308
|
in the provided time range, which by default is the last 24 hours.
|
|
@@ -299,11 +310,21 @@ class TSDBConnector(ABC):
|
|
|
299
310
|
:param endpoint_ids: A list of model endpoint identifiers.
|
|
300
311
|
:param start: The start time for the query.
|
|
301
312
|
:param end: The end time for the query.
|
|
313
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
|
|
314
|
+
to False. This can greatly improve performance when a dataframe isn't needed.
|
|
302
315
|
|
|
303
316
|
:return: A pd.DataFrame containing the columns [avg_latency, endpoint_id].
|
|
304
317
|
If an endpoint has not been invoked within the specified time range, it will not appear in the result.
|
|
305
318
|
"""
|
|
306
319
|
|
|
320
|
+
async def add_basic_metrics(
|
|
321
|
+
self,
|
|
322
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
323
|
+
project: str,
|
|
324
|
+
run_in_threadpool: Callable,
|
|
325
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
326
|
+
raise NotImplementedError()
|
|
327
|
+
|
|
307
328
|
@staticmethod
|
|
308
329
|
def df_to_metrics_values(
|
|
309
330
|
*,
|
|
@@ -311,7 +332,7 @@ class TSDBConnector(ABC):
|
|
|
311
332
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
312
333
|
project: str,
|
|
313
334
|
) -> list[
|
|
314
|
-
|
|
335
|
+
Union[
|
|
315
336
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
316
337
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
317
338
|
]
|
|
@@ -324,7 +345,7 @@ class TSDBConnector(ABC):
|
|
|
324
345
|
metrics_without_data = {metric.full_name: metric for metric in metrics}
|
|
325
346
|
|
|
326
347
|
metrics_values: list[
|
|
327
|
-
|
|
348
|
+
Union[
|
|
328
349
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
329
350
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
330
351
|
]
|
|
@@ -377,7 +398,7 @@ class TSDBConnector(ABC):
|
|
|
377
398
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
378
399
|
project: str,
|
|
379
400
|
) -> list[
|
|
380
|
-
|
|
401
|
+
Union[
|
|
381
402
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
382
403
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
383
404
|
]
|
|
@@ -390,7 +411,7 @@ class TSDBConnector(ABC):
|
|
|
390
411
|
metrics_without_data = {metric.full_name: metric for metric in metrics}
|
|
391
412
|
|
|
392
413
|
metrics_values: list[
|
|
393
|
-
|
|
414
|
+
Union[
|
|
394
415
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
395
416
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
396
417
|
]
|
|
@@ -536,7 +557,7 @@ class TSDBConnector(ABC):
|
|
|
536
557
|
*,
|
|
537
558
|
df: pd.DataFrame,
|
|
538
559
|
project: str,
|
|
539
|
-
type:
|
|
560
|
+
type: Union[str, mm_schemas.ModelEndpointMonitoringMetricType],
|
|
540
561
|
) -> dict[str, list[mm_schemas.ModelEndpointMonitoringMetric]]:
|
|
541
562
|
"""
|
|
542
563
|
Parse a DataFrame of metrics from the TSDB into a dict of intersection metrics/results by name and application
|
|
@@ -591,8 +612,8 @@ class TSDBConnector(ABC):
|
|
|
591
612
|
|
|
592
613
|
@staticmethod
|
|
593
614
|
def _get_start_end(
|
|
594
|
-
start:
|
|
595
|
-
end:
|
|
615
|
+
start: Union[datetime, None],
|
|
616
|
+
end: Union[datetime, None],
|
|
596
617
|
) -> tuple[datetime, datetime]:
|
|
597
618
|
"""
|
|
598
619
|
static utils function for tsdb start end format
|
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import typing
|
|
14
|
+
import asyncio
|
|
16
15
|
from datetime import datetime, timedelta
|
|
16
|
+
from typing import Callable, Literal, Optional, Union
|
|
17
17
|
|
|
18
18
|
import pandas as pd
|
|
19
19
|
import taosws
|
|
@@ -25,6 +25,7 @@ from taoswswrap.tdengine_connection import (
|
|
|
25
25
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
26
26
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
27
27
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
28
|
+
from mlrun.datastore.datastore_profile import DatastoreProfile
|
|
28
29
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
29
30
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
30
31
|
from mlrun.utils import logger
|
|
@@ -40,16 +41,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
40
41
|
def __init__(
|
|
41
42
|
self,
|
|
42
43
|
project: str,
|
|
43
|
-
|
|
44
|
+
profile: DatastoreProfile,
|
|
45
|
+
database: Optional[str] = None,
|
|
44
46
|
**kwargs,
|
|
45
47
|
):
|
|
46
48
|
super().__init__(project=project)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
|
|
50
|
+
self._tdengine_connection_profile = profile
|
|
51
|
+
self.database = (
|
|
52
|
+
database
|
|
53
|
+
or f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
|
|
54
|
+
)
|
|
53
55
|
|
|
54
56
|
self._connection = None
|
|
55
57
|
self._init_super_tables()
|
|
@@ -66,7 +68,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
66
68
|
def _create_connection(self) -> TDEngineConnection:
|
|
67
69
|
"""Establish a connection to the TSDB server."""
|
|
68
70
|
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
69
|
-
conn = TDEngineConnection(self.
|
|
71
|
+
conn = TDEngineConnection(self._tdengine_connection_profile.dsn())
|
|
70
72
|
conn.run(
|
|
71
73
|
statements=f"CREATE DATABASE IF NOT EXISTS {self.database}",
|
|
72
74
|
timeout=self._timeout,
|
|
@@ -164,11 +166,11 @@ class TDEngineConnector(TSDBConnector):
|
|
|
164
166
|
)
|
|
165
167
|
|
|
166
168
|
@staticmethod
|
|
167
|
-
def _convert_to_datetime(val:
|
|
169
|
+
def _convert_to_datetime(val: Union[str, datetime]) -> datetime:
|
|
168
170
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
169
171
|
|
|
170
172
|
@staticmethod
|
|
171
|
-
def _get_endpoint_filter(endpoint_id:
|
|
173
|
+
def _get_endpoint_filter(endpoint_id: Union[str, list[str]]) -> str:
|
|
172
174
|
if isinstance(endpoint_id, str):
|
|
173
175
|
return f"endpoint_id='{endpoint_id}'"
|
|
174
176
|
elif isinstance(endpoint_id, list):
|
|
@@ -196,10 +198,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
196
198
|
|
|
197
199
|
def apply_tdengine_target(name, after):
|
|
198
200
|
graph.add_step(
|
|
199
|
-
"
|
|
201
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
200
202
|
name=name,
|
|
201
203
|
after=after,
|
|
202
|
-
url=self.
|
|
204
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
203
205
|
supertable=self.tables[
|
|
204
206
|
mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
205
207
|
].super_table,
|
|
@@ -238,10 +240,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
238
240
|
after="ForwardError",
|
|
239
241
|
)
|
|
240
242
|
graph.add_step(
|
|
241
|
-
"
|
|
243
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
242
244
|
name="tsdb_error",
|
|
243
245
|
after="error_extractor",
|
|
244
|
-
url=self.
|
|
246
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
245
247
|
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
246
248
|
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
247
249
|
time_col=mm_schemas.EventFieldType.TIME,
|
|
@@ -303,17 +305,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
303
305
|
table: str,
|
|
304
306
|
start: datetime,
|
|
305
307
|
end: datetime,
|
|
306
|
-
columns:
|
|
307
|
-
filter_query:
|
|
308
|
-
interval:
|
|
309
|
-
agg_funcs:
|
|
310
|
-
limit:
|
|
311
|
-
sliding_window_step:
|
|
308
|
+
columns: Optional[list[str]] = None,
|
|
309
|
+
filter_query: Optional[str] = None,
|
|
310
|
+
interval: Optional[str] = None,
|
|
311
|
+
agg_funcs: Optional[list] = None,
|
|
312
|
+
limit: Optional[int] = None,
|
|
313
|
+
sliding_window_step: Optional[str] = None,
|
|
312
314
|
timestamp_column: str = mm_schemas.EventFieldType.TIME,
|
|
313
|
-
group_by:
|
|
314
|
-
preform_agg_columns:
|
|
315
|
-
order_by:
|
|
316
|
-
desc:
|
|
315
|
+
group_by: Optional[Union[list[str], str]] = None,
|
|
316
|
+
preform_agg_columns: Optional[list] = None,
|
|
317
|
+
order_by: Optional[str] = None,
|
|
318
|
+
desc: Optional[bool] = None,
|
|
317
319
|
) -> pd.DataFrame:
|
|
318
320
|
"""
|
|
319
321
|
Getting records from TSDB data collection.
|
|
@@ -383,17 +385,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
383
385
|
start: datetime,
|
|
384
386
|
end: datetime,
|
|
385
387
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
386
|
-
type:
|
|
388
|
+
type: Literal["metrics", "results"],
|
|
387
389
|
with_result_extra_data: bool = False,
|
|
388
|
-
) ->
|
|
390
|
+
) -> Union[
|
|
389
391
|
list[
|
|
390
|
-
|
|
392
|
+
Union[
|
|
391
393
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
392
394
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
393
395
|
],
|
|
394
396
|
],
|
|
395
397
|
list[
|
|
396
|
-
|
|
398
|
+
Union[
|
|
397
399
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
398
400
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
399
401
|
],
|
|
@@ -471,10 +473,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
471
473
|
endpoint_id: str,
|
|
472
474
|
start: datetime,
|
|
473
475
|
end: datetime,
|
|
474
|
-
aggregation_window:
|
|
475
|
-
agg_funcs:
|
|
476
|
-
limit:
|
|
477
|
-
) ->
|
|
476
|
+
aggregation_window: Optional[str] = None,
|
|
477
|
+
agg_funcs: Optional[list] = None,
|
|
478
|
+
limit: Optional[int] = None,
|
|
479
|
+
) -> Union[
|
|
478
480
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
479
481
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
480
482
|
]:
|
|
@@ -526,9 +528,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
526
528
|
|
|
527
529
|
def get_last_request(
|
|
528
530
|
self,
|
|
529
|
-
endpoint_ids:
|
|
530
|
-
start:
|
|
531
|
-
end:
|
|
531
|
+
endpoint_ids: Union[str, list[str]],
|
|
532
|
+
start: Optional[datetime] = None,
|
|
533
|
+
end: Optional[datetime] = None,
|
|
534
|
+
get_raw: bool = False,
|
|
532
535
|
) -> pd.DataFrame:
|
|
533
536
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
534
537
|
start, end = self._get_start_end(start, end)
|
|
@@ -566,9 +569,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
566
569
|
|
|
567
570
|
def get_drift_status(
|
|
568
571
|
self,
|
|
569
|
-
endpoint_ids:
|
|
570
|
-
start:
|
|
571
|
-
end:
|
|
572
|
+
endpoint_ids: Union[str, list[str]],
|
|
573
|
+
start: Optional[datetime] = None,
|
|
574
|
+
end: Optional[datetime] = None,
|
|
575
|
+
get_raw: bool = False,
|
|
572
576
|
) -> pd.DataFrame:
|
|
573
577
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
574
578
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
@@ -599,9 +603,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
599
603
|
|
|
600
604
|
def get_metrics_metadata(
|
|
601
605
|
self,
|
|
602
|
-
endpoint_id:
|
|
603
|
-
start:
|
|
604
|
-
end:
|
|
606
|
+
endpoint_id: Union[str, list[str]],
|
|
607
|
+
start: Optional[datetime] = None,
|
|
608
|
+
end: Optional[datetime] = None,
|
|
605
609
|
) -> pd.DataFrame:
|
|
606
610
|
start, end = self._get_start_end(start, end)
|
|
607
611
|
df = self._get_records(
|
|
@@ -636,9 +640,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
636
640
|
|
|
637
641
|
def get_results_metadata(
|
|
638
642
|
self,
|
|
639
|
-
endpoint_id:
|
|
640
|
-
start:
|
|
641
|
-
end:
|
|
643
|
+
endpoint_id: Union[str, list[str]],
|
|
644
|
+
start: Optional[datetime] = None,
|
|
645
|
+
end: Optional[datetime] = None,
|
|
642
646
|
) -> pd.DataFrame:
|
|
643
647
|
start, end = self._get_start_end(start, end)
|
|
644
648
|
df = self._get_records(
|
|
@@ -675,9 +679,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
675
679
|
|
|
676
680
|
def get_error_count(
|
|
677
681
|
self,
|
|
678
|
-
endpoint_ids:
|
|
679
|
-
start:
|
|
680
|
-
end:
|
|
682
|
+
endpoint_ids: Union[str, list[str]],
|
|
683
|
+
start: Optional[datetime] = None,
|
|
684
|
+
end: Optional[datetime] = None,
|
|
685
|
+
get_raw: bool = False,
|
|
681
686
|
) -> pd.DataFrame:
|
|
682
687
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
683
688
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
@@ -705,9 +710,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
705
710
|
|
|
706
711
|
def get_avg_latency(
|
|
707
712
|
self,
|
|
708
|
-
endpoint_ids:
|
|
709
|
-
start:
|
|
710
|
-
end:
|
|
713
|
+
endpoint_ids: Union[str, list[str]],
|
|
714
|
+
start: Optional[datetime] = None,
|
|
715
|
+
end: Optional[datetime] = None,
|
|
716
|
+
get_raw: bool = False,
|
|
711
717
|
) -> pd.DataFrame:
|
|
712
718
|
endpoint_ids = (
|
|
713
719
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
@@ -735,11 +741,74 @@ class TDEngineConnector(TSDBConnector):
|
|
|
735
741
|
df.dropna(inplace=True)
|
|
736
742
|
return df
|
|
737
743
|
|
|
744
|
+
async def add_basic_metrics(
|
|
745
|
+
self,
|
|
746
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
747
|
+
project: str,
|
|
748
|
+
run_in_threadpool: Callable,
|
|
749
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
750
|
+
"""
|
|
751
|
+
Add basic metrics to the model endpoint object.
|
|
752
|
+
|
|
753
|
+
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
754
|
+
be filled with the relevant basic metrics.
|
|
755
|
+
:param project: The name of the project.
|
|
756
|
+
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
757
|
+
|
|
758
|
+
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
759
|
+
"""
|
|
760
|
+
|
|
761
|
+
uids = [mep.metadata.uid for mep in model_endpoint_objects]
|
|
762
|
+
coroutines = [
|
|
763
|
+
run_in_threadpool(self.get_error_count, endpoint_ids=uids),
|
|
764
|
+
run_in_threadpool(self.get_last_request, endpoint_ids=uids),
|
|
765
|
+
run_in_threadpool(self.get_avg_latency, endpoint_ids=uids),
|
|
766
|
+
run_in_threadpool(self.get_drift_status, endpoint_ids=uids),
|
|
767
|
+
]
|
|
768
|
+
|
|
769
|
+
(
|
|
770
|
+
error_count_df,
|
|
771
|
+
last_request_df,
|
|
772
|
+
avg_latency_df,
|
|
773
|
+
drift_status_df,
|
|
774
|
+
) = await asyncio.gather(*coroutines)
|
|
775
|
+
|
|
776
|
+
def add_metrics(
|
|
777
|
+
mep: mlrun.common.schemas.ModelEndpoint,
|
|
778
|
+
df_dictionary: dict[str, pd.DataFrame],
|
|
779
|
+
):
|
|
780
|
+
for metric in df_dictionary.keys():
|
|
781
|
+
df = df_dictionary.get(metric, pd.DataFrame())
|
|
782
|
+
if not df.empty:
|
|
783
|
+
line = df[df["endpoint_id"] == mep.metadata.uid]
|
|
784
|
+
if not line.empty and metric in line:
|
|
785
|
+
value = line[metric].item()
|
|
786
|
+
if isinstance(value, pd.Timestamp):
|
|
787
|
+
value = value.to_pydatetime()
|
|
788
|
+
setattr(mep.status, metric, value)
|
|
789
|
+
|
|
790
|
+
return mep
|
|
791
|
+
|
|
792
|
+
return list(
|
|
793
|
+
map(
|
|
794
|
+
lambda mep: add_metrics(
|
|
795
|
+
mep=mep,
|
|
796
|
+
df_dictionary={
|
|
797
|
+
"error_count": error_count_df,
|
|
798
|
+
"last_request": last_request_df,
|
|
799
|
+
"avg_latency": avg_latency_df,
|
|
800
|
+
"result_status": drift_status_df,
|
|
801
|
+
},
|
|
802
|
+
),
|
|
803
|
+
model_endpoint_objects,
|
|
804
|
+
)
|
|
805
|
+
)
|
|
806
|
+
|
|
738
807
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
739
808
|
#
|
|
740
809
|
# def read_prediction_metric_for_endpoint_if_exists(
|
|
741
810
|
# self, endpoint_id: str
|
|
742
|
-
# ) ->
|
|
811
|
+
# ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
743
812
|
# """
|
|
744
813
|
# Read the "invocations" metric for the provided model endpoint, and return the metric object
|
|
745
814
|
# if it exists.
|