mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +5 -2
- mlrun/alerts/alert.py +1 -1
- mlrun/artifacts/manager.py +5 -1
- mlrun/common/constants.py +64 -3
- mlrun/common/formatters/__init__.py +16 -0
- mlrun/common/formatters/base.py +59 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/runtimes/constants.py +32 -4
- mlrun/common/schemas/__init__.py +1 -2
- mlrun/common/schemas/alert.py +31 -9
- mlrun/common/schemas/api_gateway.py +52 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +9 -4
- mlrun/common/schemas/model_monitoring/constants.py +22 -8
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
- mlrun/config.py +9 -2
- mlrun/data_types/to_pandas.py +5 -5
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +106 -7
- mlrun/datastore/store_resources.py +5 -1
- mlrun/datastore/targets.py +5 -4
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +5 -1
- mlrun/db/httpdb.py +22 -3
- mlrun/db/nopdb.py +5 -1
- mlrun/errors.py +6 -0
- mlrun/execution.py +16 -6
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/conversion.py +5 -5
- mlrun/feature_store/retrieval/job.py +7 -3
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/launcher/client.py +4 -2
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/model.py +5 -1
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +16 -4
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +235 -166
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
- mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
- mlrun/model_monitoring/db/tsdb/base.py +232 -38
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +292 -104
- mlrun/model_monitoring/helpers.py +45 -0
- mlrun/model_monitoring/stream_processing.py +7 -4
- mlrun/model_monitoring/writer.py +50 -20
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/projects/operations.py +8 -5
- mlrun/projects/pipelines.py +42 -15
- mlrun/projects/project.py +55 -14
- mlrun/render.py +8 -5
- mlrun/runtimes/base.py +2 -1
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/local.py +4 -1
- mlrun/runtimes/nuclio/api_gateway.py +32 -8
- mlrun/runtimes/nuclio/application/application.py +3 -3
- mlrun/runtimes/nuclio/function.py +1 -4
- mlrun/runtimes/utils.py +5 -6
- mlrun/serving/server.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +28 -7
- mlrun/utils/logger.py +28 -1
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/slack.py +27 -7
- mlrun/utils/notifications/notification_pusher.py +47 -42
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +9 -4
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +89 -82
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -134
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
|
@@ -15,23 +15,79 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import typing
|
|
18
|
+
from dataclasses import dataclass
|
|
18
19
|
from http import HTTPStatus
|
|
19
20
|
|
|
20
21
|
import v3io.dataplane
|
|
22
|
+
import v3io.dataplane.output
|
|
21
23
|
import v3io.dataplane.response
|
|
22
24
|
|
|
23
25
|
import mlrun.common.model_monitoring.helpers
|
|
24
|
-
import mlrun.common.schemas.model_monitoring as
|
|
26
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
25
27
|
import mlrun.model_monitoring.db
|
|
26
28
|
import mlrun.utils.v3io_clients
|
|
27
29
|
from mlrun.utils import logger
|
|
28
30
|
|
|
29
31
|
# Fields to encode before storing in the KV table or to decode after retrieving
|
|
30
32
|
fields_to_encode_decode = [
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
mm_schemas.EventFieldType.FEATURE_STATS,
|
|
34
|
+
mm_schemas.EventFieldType.CURRENT_STATS,
|
|
33
35
|
]
|
|
34
36
|
|
|
37
|
+
_METRIC_FIELDS: list[str] = [
|
|
38
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
39
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
40
|
+
mm_schemas.MetricData.METRIC_VALUE,
|
|
41
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
42
|
+
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SchemaField(typing.TypedDict):
|
|
47
|
+
name: str
|
|
48
|
+
type: str
|
|
49
|
+
nullable: bool
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class SchemaParams:
|
|
54
|
+
key: str
|
|
55
|
+
fields: list[SchemaField]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
_RESULT_SCHEMA: list[SchemaField] = [
|
|
59
|
+
SchemaField(
|
|
60
|
+
name=mm_schemas.ResultData.RESULT_NAME,
|
|
61
|
+
type=mm_schemas.GrafanaColumnType.STRING,
|
|
62
|
+
nullable=False,
|
|
63
|
+
)
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
_METRIC_SCHEMA: list[SchemaField] = [
|
|
67
|
+
SchemaField(
|
|
68
|
+
name=mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
69
|
+
type=mm_schemas.GrafanaColumnType.STRING,
|
|
70
|
+
nullable=False,
|
|
71
|
+
),
|
|
72
|
+
SchemaField(
|
|
73
|
+
name=mm_schemas.MetricData.METRIC_NAME,
|
|
74
|
+
type=mm_schemas.GrafanaColumnType.STRING,
|
|
75
|
+
nullable=False,
|
|
76
|
+
),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
_KIND_TO_SCHEMA_PARAMS: dict[mm_schemas.WriterEventKind, SchemaParams] = {
|
|
81
|
+
mm_schemas.WriterEventKind.RESULT: SchemaParams(
|
|
82
|
+
key=mm_schemas.WriterEvent.APPLICATION_NAME, fields=_RESULT_SCHEMA
|
|
83
|
+
),
|
|
84
|
+
mm_schemas.WriterEventKind.METRIC: SchemaParams(
|
|
85
|
+
key="metric_id", fields=_METRIC_SCHEMA
|
|
86
|
+
),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
_EXCLUDE_SCHEMA_FILTER_EXPRESSION = '__name!=".#schema"'
|
|
90
|
+
|
|
35
91
|
|
|
36
92
|
class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
37
93
|
"""
|
|
@@ -64,7 +120,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
64
120
|
self.client.kv.put(
|
|
65
121
|
container=self.container,
|
|
66
122
|
table_path=self.path,
|
|
67
|
-
key=endpoint[
|
|
123
|
+
key=endpoint[mm_schemas.EventFieldType.UID],
|
|
68
124
|
attributes=endpoint,
|
|
69
125
|
)
|
|
70
126
|
|
|
@@ -151,7 +207,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
151
207
|
"""Getting path and container based on the model monitoring configurations"""
|
|
152
208
|
path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
153
209
|
project=self.project,
|
|
154
|
-
kind=
|
|
210
|
+
kind=mm_schemas.ModelMonitoringStoreKinds.ENDPOINTS,
|
|
155
211
|
)
|
|
156
212
|
(
|
|
157
213
|
_,
|
|
@@ -217,11 +273,11 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
217
273
|
if uids is None:
|
|
218
274
|
uids = []
|
|
219
275
|
for item in items:
|
|
220
|
-
if
|
|
276
|
+
if mm_schemas.EventFieldType.UID not in item:
|
|
221
277
|
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
222
|
-
uids.append(item[
|
|
278
|
+
uids.append(item[mm_schemas.EventFieldType.ENDPOINT_ID])
|
|
223
279
|
else:
|
|
224
|
-
uids.append(item[
|
|
280
|
+
uids.append(item[mm_schemas.EventFieldType.UID])
|
|
225
281
|
|
|
226
282
|
# Add each relevant model endpoint to the model endpoints list
|
|
227
283
|
for endpoint_id in uids:
|
|
@@ -241,11 +297,11 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
241
297
|
|
|
242
298
|
# Delete model endpoint record from KV table
|
|
243
299
|
for endpoint_dict in endpoints:
|
|
244
|
-
if
|
|
300
|
+
if mm_schemas.EventFieldType.UID not in endpoint_dict:
|
|
245
301
|
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
246
|
-
endpoint_id = endpoint_dict[
|
|
302
|
+
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
|
|
247
303
|
else:
|
|
248
|
-
endpoint_id = endpoint_dict[
|
|
304
|
+
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
|
|
249
305
|
self.delete_model_endpoint(
|
|
250
306
|
endpoint_id,
|
|
251
307
|
)
|
|
@@ -268,11 +324,19 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
268
324
|
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
269
325
|
)
|
|
270
326
|
|
|
327
|
+
@staticmethod
|
|
328
|
+
def _get_results_table_path(endpoint_id: str) -> str:
|
|
329
|
+
return endpoint_id
|
|
330
|
+
|
|
331
|
+
@staticmethod
|
|
332
|
+
def _get_metrics_table_path(endpoint_id: str) -> str:
|
|
333
|
+
return f"{endpoint_id}_metrics"
|
|
334
|
+
|
|
271
335
|
def write_application_event(
|
|
272
336
|
self,
|
|
273
337
|
event: dict[str, typing.Any],
|
|
274
|
-
kind:
|
|
275
|
-
):
|
|
338
|
+
kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
|
|
339
|
+
) -> None:
|
|
276
340
|
"""
|
|
277
341
|
Write a new application event in the target table.
|
|
278
342
|
|
|
@@ -281,66 +345,63 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
281
345
|
object.
|
|
282
346
|
:param kind: The type of the event, can be either "result" or "metric".
|
|
283
347
|
"""
|
|
284
|
-
if kind == mm_constants.WriterEventKind.METRIC:
|
|
285
|
-
# TODO : Implement the logic for writing metrics to KV
|
|
286
|
-
return
|
|
287
|
-
|
|
288
|
-
endpoint_id = event.pop(mm_constants.WriterEvent.ENDPOINT_ID)
|
|
289
|
-
app_name = event.pop(mm_constants.WriterEvent.APPLICATION_NAME)
|
|
290
|
-
metric_name = event.pop(mm_constants.ResultData.RESULT_NAME)
|
|
291
|
-
attributes = {metric_name: json.dumps(event)}
|
|
292
348
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
349
|
+
container = self.get_v3io_monitoring_apps_container(project_name=self.project)
|
|
350
|
+
endpoint_id = event.pop(mm_schemas.WriterEvent.ENDPOINT_ID)
|
|
351
|
+
|
|
352
|
+
if kind == mm_schemas.WriterEventKind.METRIC:
|
|
353
|
+
table_path = self._get_metrics_table_path(endpoint_id)
|
|
354
|
+
key = f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}.{event[mm_schemas.MetricData.METRIC_NAME]}"
|
|
355
|
+
attributes = {event_key: event[event_key] for event_key in _METRIC_FIELDS}
|
|
356
|
+
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
357
|
+
table_path = self._get_results_table_path(endpoint_id)
|
|
358
|
+
key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
|
|
359
|
+
metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
|
|
360
|
+
attributes = {metric_name: json.dumps(event)}
|
|
361
|
+
else:
|
|
362
|
+
raise ValueError(f"Invalid {kind = }")
|
|
296
363
|
|
|
297
364
|
self.client.kv.update(
|
|
298
|
-
container=
|
|
299
|
-
table_path=
|
|
300
|
-
key=
|
|
365
|
+
container=container,
|
|
366
|
+
table_path=table_path,
|
|
367
|
+
key=key,
|
|
301
368
|
attributes=attributes,
|
|
302
369
|
)
|
|
303
370
|
|
|
304
371
|
schema_file = self.client.kv.new_cursor(
|
|
305
|
-
container=
|
|
306
|
-
table_path=
|
|
372
|
+
container=container,
|
|
373
|
+
table_path=table_path,
|
|
307
374
|
filter_expression='__name==".#schema"',
|
|
308
375
|
)
|
|
309
376
|
|
|
310
377
|
if not schema_file.all():
|
|
311
378
|
logger.info(
|
|
312
|
-
"
|
|
313
|
-
container=
|
|
314
|
-
|
|
379
|
+
"Generating a new V3IO KV schema file",
|
|
380
|
+
container=container,
|
|
381
|
+
table_path=table_path,
|
|
382
|
+
)
|
|
383
|
+
self._generate_kv_schema(
|
|
384
|
+
container=container, table_path=table_path, kind=kind
|
|
315
385
|
)
|
|
316
|
-
|
|
317
|
-
logger.info("Updated V3IO KV successfully", key=app_name)
|
|
386
|
+
logger.info("Updated V3IO KV successfully", key=key)
|
|
318
387
|
|
|
319
388
|
def _generate_kv_schema(
|
|
320
|
-
self,
|
|
321
|
-
):
|
|
389
|
+
self, *, container: str, table_path: str, kind: mm_schemas.WriterEventKind
|
|
390
|
+
) -> None:
|
|
322
391
|
"""Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
|
|
323
|
-
|
|
324
|
-
{
|
|
325
|
-
"name": mm_constants.ResultData.RESULT_NAME,
|
|
326
|
-
"type": "string",
|
|
327
|
-
"nullable": False,
|
|
328
|
-
}
|
|
329
|
-
]
|
|
392
|
+
schema_params = _KIND_TO_SCHEMA_PARAMS[kind]
|
|
330
393
|
res = self.client.kv.create_schema(
|
|
331
|
-
container=
|
|
332
|
-
table_path=
|
|
333
|
-
key=
|
|
334
|
-
fields=fields,
|
|
394
|
+
container=container,
|
|
395
|
+
table_path=table_path,
|
|
396
|
+
key=schema_params.key,
|
|
397
|
+
fields=schema_params.fields,
|
|
335
398
|
)
|
|
336
399
|
if res.status_code != HTTPStatus.OK:
|
|
337
400
|
raise mlrun.errors.MLRunBadRequestError(
|
|
338
|
-
f"Couldn't infer schema for endpoint {
|
|
401
|
+
f"Couldn't infer schema for endpoint {table_path} which is required for Grafana dashboards"
|
|
339
402
|
)
|
|
340
403
|
else:
|
|
341
|
-
logger.info(
|
|
342
|
-
"Generated V3IO KV schema successfully", endpoint_id=endpoint_id
|
|
343
|
-
)
|
|
404
|
+
logger.info("Generated V3IO KV schema successfully", table_path=table_path)
|
|
344
405
|
|
|
345
406
|
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
346
407
|
"""
|
|
@@ -361,7 +422,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
361
422
|
table_path=endpoint_id,
|
|
362
423
|
key=application_name,
|
|
363
424
|
)
|
|
364
|
-
return data.output.item[
|
|
425
|
+
return data.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
|
|
365
426
|
except v3io.dataplane.response.HttpResponseError as err:
|
|
366
427
|
logger.debug("Error while getting last analyzed time", err=err)
|
|
367
428
|
raise mlrun.errors.MLRunNotFoundError(
|
|
@@ -386,7 +447,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
386
447
|
),
|
|
387
448
|
table_path=endpoint_id,
|
|
388
449
|
key=application_name,
|
|
389
|
-
attributes={
|
|
450
|
+
attributes={mm_schemas.SchedulingKeys.LAST_ANALYZED: last_analyzed},
|
|
390
451
|
)
|
|
391
452
|
|
|
392
453
|
def _generate_tsdb_paths(self) -> tuple[str, str]:
|
|
@@ -399,7 +460,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
399
460
|
full_path = (
|
|
400
461
|
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
401
462
|
project=self.project,
|
|
402
|
-
kind=
|
|
463
|
+
kind=mm_schemas.ModelMonitoringStoreKinds.EVENTS,
|
|
403
464
|
)
|
|
404
465
|
)
|
|
405
466
|
|
|
@@ -495,8 +556,8 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
495
556
|
# Apply top_level filter (remove endpoints that considered a child of a router)
|
|
496
557
|
if top_level:
|
|
497
558
|
filter_expression.append(
|
|
498
|
-
f"(endpoint_type=='{str(
|
|
499
|
-
f"OR endpoint_type=='{str(
|
|
559
|
+
f"(endpoint_type=='{str(mm_schemas.EndpointType.NODE_EP.value)}' "
|
|
560
|
+
f"OR endpoint_type=='{str(mm_schemas.EndpointType.ROUTER.value)}')"
|
|
500
561
|
)
|
|
501
562
|
|
|
502
563
|
return " AND ".join(filter_expression)
|
|
@@ -516,30 +577,30 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
516
577
|
# Validate default value for `error_count`
|
|
517
578
|
# For backwards compatibility reasons, we validate that the model endpoint includes the `error_count` key
|
|
518
579
|
if (
|
|
519
|
-
|
|
520
|
-
and endpoint[
|
|
580
|
+
mm_schemas.EventFieldType.ERROR_COUNT in endpoint
|
|
581
|
+
and endpoint[mm_schemas.EventFieldType.ERROR_COUNT] == "null"
|
|
521
582
|
):
|
|
522
|
-
endpoint[
|
|
583
|
+
endpoint[mm_schemas.EventFieldType.ERROR_COUNT] = "0"
|
|
523
584
|
|
|
524
585
|
# Validate default value for `metrics`
|
|
525
586
|
# For backwards compatibility reasons, we validate that the model endpoint includes the `metrics` key
|
|
526
587
|
if (
|
|
527
|
-
|
|
528
|
-
and endpoint[
|
|
588
|
+
mm_schemas.EventFieldType.METRICS in endpoint
|
|
589
|
+
and endpoint[mm_schemas.EventFieldType.METRICS] == "null"
|
|
529
590
|
):
|
|
530
|
-
endpoint[
|
|
591
|
+
endpoint[mm_schemas.EventFieldType.METRICS] = json.dumps(
|
|
531
592
|
{
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
593
|
+
mm_schemas.EventKeyMetrics.GENERIC: {
|
|
594
|
+
mm_schemas.EventLiveStats.LATENCY_AVG_1H: 0,
|
|
595
|
+
mm_schemas.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
535
596
|
}
|
|
536
597
|
}
|
|
537
598
|
)
|
|
538
599
|
# Validate key `uid` instead of `endpoint_id`
|
|
539
600
|
# For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
|
|
540
|
-
if
|
|
541
|
-
endpoint[
|
|
542
|
-
|
|
601
|
+
if mm_schemas.EventFieldType.ENDPOINT_ID in endpoint:
|
|
602
|
+
endpoint[mm_schemas.EventFieldType.UID] = endpoint[
|
|
603
|
+
mm_schemas.EventFieldType.ENDPOINT_ID
|
|
543
604
|
]
|
|
544
605
|
|
|
545
606
|
@staticmethod
|
|
@@ -566,57 +627,95 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
566
627
|
def _get_monitoring_schedules_container(project_name: str) -> str:
|
|
567
628
|
return f"users/pipelines/{project_name}/monitoring-schedules/functions"
|
|
568
629
|
|
|
569
|
-
def
|
|
630
|
+
def _extract_results_from_items(
|
|
570
631
|
self, app_items: list[dict[str, str]]
|
|
571
|
-
) -> list[
|
|
572
|
-
|
|
632
|
+
) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
633
|
+
"""Assuming .#schema items are filtered out"""
|
|
634
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
|
|
573
635
|
for app_item in app_items:
|
|
574
|
-
# See https://www.iguazio.com/docs/latest-release/services/data-layer/reference/system-attributes/#sys-attr-__name
|
|
575
636
|
app_name = app_item.pop("__name")
|
|
576
|
-
if app_name == ".#schema":
|
|
577
|
-
continue
|
|
578
637
|
for result_name in app_item:
|
|
579
638
|
metrics.append(
|
|
580
|
-
|
|
639
|
+
mm_schemas.ModelEndpointMonitoringMetric(
|
|
581
640
|
project=self.project,
|
|
582
641
|
app=app_name,
|
|
583
|
-
type=
|
|
642
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.RESULT,
|
|
584
643
|
name=result_name,
|
|
585
|
-
full_name=
|
|
644
|
+
full_name=mm_schemas.model_endpoints._compose_full_name(
|
|
586
645
|
project=self.project, app=app_name, name=result_name
|
|
587
646
|
),
|
|
588
647
|
)
|
|
589
648
|
)
|
|
590
649
|
return metrics
|
|
591
650
|
|
|
651
|
+
def _extract_metrics_from_items(
|
|
652
|
+
self, result_items: list[dict[str, str]]
|
|
653
|
+
) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
654
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
|
|
655
|
+
logger.debug("Result items", result_items=result_items)
|
|
656
|
+
for result_item in result_items:
|
|
657
|
+
app = result_item[mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
658
|
+
name = result_item[mm_schemas.MetricData.METRIC_NAME]
|
|
659
|
+
metrics.append(
|
|
660
|
+
mm_schemas.ModelEndpointMonitoringMetric(
|
|
661
|
+
project=self.project,
|
|
662
|
+
app=app,
|
|
663
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
|
|
664
|
+
name=name,
|
|
665
|
+
full_name=mm_schemas.model_endpoints._compose_full_name(
|
|
666
|
+
project=self.project,
|
|
667
|
+
app=app,
|
|
668
|
+
name=name,
|
|
669
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
|
|
670
|
+
),
|
|
671
|
+
)
|
|
672
|
+
)
|
|
673
|
+
return metrics
|
|
674
|
+
|
|
592
675
|
def get_model_endpoint_metrics(
|
|
593
|
-
self, endpoint_id: str
|
|
594
|
-
) -> list[
|
|
676
|
+
self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType
|
|
677
|
+
) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
595
678
|
"""Get model monitoring results and metrics on the endpoint"""
|
|
596
|
-
metrics: list[
|
|
679
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
|
|
597
680
|
container = self.get_v3io_monitoring_apps_container(self.project)
|
|
681
|
+
if type == mm_schemas.ModelEndpointMonitoringMetricType.METRIC:
|
|
682
|
+
table_path = self._get_metrics_table_path(endpoint_id)
|
|
683
|
+
items_extractor = self._extract_metrics_from_items
|
|
684
|
+
elif type == mm_schemas.ModelEndpointMonitoringMetricType.RESULT:
|
|
685
|
+
table_path = self._get_results_table_path(endpoint_id)
|
|
686
|
+
items_extractor = self._extract_results_from_items
|
|
687
|
+
else:
|
|
688
|
+
raise ValueError(f"Invalid metric {type = }")
|
|
689
|
+
|
|
690
|
+
def scan(
|
|
691
|
+
marker: typing.Optional[str] = None,
|
|
692
|
+
) -> v3io.dataplane.response.Response:
|
|
693
|
+
# TODO: Use AIO client: `v3io.aio.dataplane.client.Client`
|
|
694
|
+
return self.client.kv.scan(
|
|
695
|
+
container=container,
|
|
696
|
+
table_path=table_path,
|
|
697
|
+
marker=marker,
|
|
698
|
+
filter_expression=_EXCLUDE_SCHEMA_FILTER_EXPRESSION,
|
|
699
|
+
)
|
|
700
|
+
|
|
598
701
|
try:
|
|
599
|
-
response =
|
|
702
|
+
response = scan()
|
|
600
703
|
except v3io.dataplane.response.HttpResponseError as err:
|
|
601
704
|
if err.status_code == HTTPStatus.NOT_FOUND:
|
|
602
705
|
logger.warning(
|
|
603
|
-
"Attempt getting
|
|
706
|
+
f"Attempt getting {type}s - no data. Check the "
|
|
604
707
|
"project name, endpoint, or wait for the applications to start.",
|
|
605
708
|
container=container,
|
|
606
|
-
table_path=
|
|
709
|
+
table_path=table_path,
|
|
607
710
|
)
|
|
608
711
|
return []
|
|
609
712
|
raise
|
|
610
713
|
|
|
611
714
|
while True:
|
|
612
|
-
|
|
613
|
-
|
|
715
|
+
output = typing.cast(v3io.dataplane.output.GetItemsOutput, response.output)
|
|
716
|
+
metrics.extend(items_extractor(output.items))
|
|
717
|
+
if output.last:
|
|
614
718
|
break
|
|
615
|
-
|
|
616
|
-
response = self.client.kv.scan(
|
|
617
|
-
container=container,
|
|
618
|
-
table_path=endpoint_id,
|
|
619
|
-
marker=response.output.next_marker,
|
|
620
|
-
)
|
|
719
|
+
response = scan(marker=output.next_marker)
|
|
621
720
|
|
|
622
721
|
return metrics
|
|
@@ -25,6 +25,7 @@ class ObjectTSDBFactory(enum.Enum):
|
|
|
25
25
|
"""Enum class to handle the different TSDB connector type values for storing real time metrics"""
|
|
26
26
|
|
|
27
27
|
v3io_tsdb = "v3io-tsdb"
|
|
28
|
+
tdengine = "tdengine"
|
|
28
29
|
|
|
29
30
|
def to_tsdb_connector(self, project: str, **kwargs) -> TSDBConnector:
|
|
30
31
|
"""
|
|
@@ -43,6 +44,13 @@ class ObjectTSDBFactory(enum.Enum):
|
|
|
43
44
|
|
|
44
45
|
return V3IOTSDBConnector(project=project, **kwargs)
|
|
45
46
|
|
|
47
|
+
# Assuming TDEngine connector if connector type is not V3IO TSDB.
|
|
48
|
+
# Update these lines once there are more than two connector types.
|
|
49
|
+
|
|
50
|
+
from .tdengine.tdengine_connector import TDEngineConnector
|
|
51
|
+
|
|
52
|
+
return TDEngineConnector(project=project, **kwargs)
|
|
53
|
+
|
|
46
54
|
@classmethod
|
|
47
55
|
def _missing_(cls, value: typing.Any):
|
|
48
56
|
"""A lookup function to handle an invalid value.
|
|
@@ -54,18 +62,39 @@ class ObjectTSDBFactory(enum.Enum):
|
|
|
54
62
|
)
|
|
55
63
|
|
|
56
64
|
|
|
57
|
-
def get_tsdb_connector(
|
|
65
|
+
def get_tsdb_connector(
|
|
66
|
+
project: str,
|
|
67
|
+
tsdb_connector_type: str = "",
|
|
68
|
+
secret_provider: typing.Optional[typing.Callable] = None,
|
|
69
|
+
**kwargs,
|
|
70
|
+
) -> TSDBConnector:
|
|
58
71
|
"""
|
|
59
|
-
Get
|
|
72
|
+
Get TSDB connector object.
|
|
60
73
|
:param project: The name of the project.
|
|
74
|
+
:param tsdb_connector_type: The type of the TSDB connector. See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory
|
|
75
|
+
for available options.
|
|
76
|
+
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
77
|
+
|
|
61
78
|
:return: `TSDBConnector` object. The main goal of this object is to handle different operations on the
|
|
62
79
|
TSDB connector such as updating drift metrics or write application record result.
|
|
63
80
|
"""
|
|
64
81
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
mlrun.mlconf.model_endpoint_monitoring.tsdb_connector_type
|
|
82
|
+
tsdb_connection_string = mlrun.model_monitoring.helpers.get_tsdb_connection_string(
|
|
83
|
+
secret_provider=secret_provider
|
|
68
84
|
)
|
|
69
85
|
|
|
86
|
+
if tsdb_connection_string and tsdb_connection_string.startswith("taosws"):
|
|
87
|
+
tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
|
|
88
|
+
kwargs["connection_string"] = tsdb_connection_string
|
|
89
|
+
|
|
90
|
+
# Set the default TSDB connector type if no connection has been set
|
|
91
|
+
tsdb_connector_type = (
|
|
92
|
+
tsdb_connector_type
|
|
93
|
+
or mlrun.mlconf.model_endpoint_monitoring.tsdb_connector_type
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Get connector type value from ObjectTSDBFactory enum class
|
|
97
|
+
tsdb_connector_factory = ObjectTSDBFactory(tsdb_connector_type)
|
|
98
|
+
|
|
70
99
|
# Convert into TSDB connector object
|
|
71
|
-
return
|
|
100
|
+
return tsdb_connector_factory.to_tsdb_connector(project=project, **kwargs)
|