mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +1 -1
- mlrun/artifacts/manager.py +5 -1
- mlrun/common/runtimes/constants.py +3 -0
- mlrun/common/schemas/__init__.py +1 -1
- mlrun/common/schemas/alert.py +31 -9
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +3 -1
- mlrun/common/schemas/model_monitoring/constants.py +20 -1
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
- mlrun/config.py +2 -0
- mlrun/data_types/to_pandas.py +5 -5
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +111 -6
- mlrun/datastore/targets.py +2 -2
- mlrun/db/base.py +5 -1
- mlrun/db/httpdb.py +22 -3
- mlrun/db/nopdb.py +5 -1
- mlrun/errors.py +6 -0
- mlrun/feature_store/retrieval/conversion.py +5 -5
- mlrun/feature_store/retrieval/job.py +3 -2
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
- mlrun/model_monitoring/db/stores/base/store.py +16 -3
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
- mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
- mlrun/model_monitoring/db/tsdb/base.py +25 -18
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +73 -72
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
- mlrun/model_monitoring/helpers.py +32 -0
- mlrun/model_monitoring/stream_processing.py +7 -4
- mlrun/model_monitoring/writer.py +18 -13
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/projects/project.py +33 -8
- mlrun/render.py +8 -5
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +20 -1
- mlrun/utils/notifications/notification/slack.py +27 -7
- mlrun/utils/notifications/notification_pusher.py +38 -40
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +7 -2
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +55 -51
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
import datetime
|
|
15
|
+
import typing
|
|
16
16
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
import v3io_frames.client
|
|
@@ -21,7 +21,7 @@ from v3io.dataplane import Client as V3IOClient
|
|
|
21
21
|
from v3io_frames.frames_pb2 import IGNORE
|
|
22
22
|
|
|
23
23
|
import mlrun.common.model_monitoring
|
|
24
|
-
import mlrun.common.schemas.model_monitoring as
|
|
24
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
25
25
|
import mlrun.feature_store.steps
|
|
26
26
|
import mlrun.utils.v3io_clients
|
|
27
27
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
@@ -37,12 +37,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
37
37
|
Client that provides API for executing commands on the V3IO TSDB table.
|
|
38
38
|
"""
|
|
39
39
|
|
|
40
|
+
type: str = mm_schemas.TSDBTarget.V3IO_TSDB
|
|
41
|
+
|
|
40
42
|
def __init__(
|
|
41
43
|
self,
|
|
42
44
|
project: str,
|
|
43
|
-
access_key: str = None,
|
|
45
|
+
access_key: typing.Optional[str] = None,
|
|
44
46
|
container: str = "users",
|
|
45
|
-
v3io_framesd: str = None,
|
|
47
|
+
v3io_framesd: typing.Optional[str] = None,
|
|
46
48
|
create_table: bool = False,
|
|
47
49
|
):
|
|
48
50
|
super().__init__(project=project)
|
|
@@ -61,14 +63,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
61
63
|
self._init_tables_path()
|
|
62
64
|
|
|
63
65
|
if create_table:
|
|
64
|
-
self.
|
|
66
|
+
self.create_tables()
|
|
65
67
|
|
|
66
68
|
def _init_tables_path(self):
|
|
67
69
|
self.tables = {}
|
|
68
70
|
|
|
69
71
|
events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
70
72
|
project=self.project,
|
|
71
|
-
kind=
|
|
73
|
+
kind=mm_schemas.FileTargetKind.EVENTS,
|
|
72
74
|
)
|
|
73
75
|
(
|
|
74
76
|
_,
|
|
@@ -77,12 +79,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
77
79
|
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
78
80
|
events_table_full_path
|
|
79
81
|
)
|
|
80
|
-
self.tables[
|
|
82
|
+
self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
|
|
81
83
|
|
|
82
84
|
monitoring_application_full_path = (
|
|
83
85
|
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
84
86
|
project=self.project,
|
|
85
|
-
kind=
|
|
87
|
+
kind=mm_schemas.FileTargetKind.MONITORING_APPLICATION,
|
|
86
88
|
)
|
|
87
89
|
)
|
|
88
90
|
(
|
|
@@ -92,17 +94,17 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
92
94
|
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
93
95
|
monitoring_application_full_path
|
|
94
96
|
)
|
|
95
|
-
self.tables[
|
|
96
|
-
monitoring_application_path +
|
|
97
|
+
self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS] = (
|
|
98
|
+
monitoring_application_path + mm_schemas.V3IOTSDBTables.APP_RESULTS
|
|
97
99
|
)
|
|
98
|
-
self.tables[
|
|
99
|
-
monitoring_application_path +
|
|
100
|
+
self.tables[mm_schemas.V3IOTSDBTables.METRICS] = (
|
|
101
|
+
monitoring_application_path + mm_schemas.V3IOTSDBTables.METRICS
|
|
100
102
|
)
|
|
101
103
|
|
|
102
104
|
monitoring_predictions_full_path = (
|
|
103
105
|
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
104
106
|
project=self.project,
|
|
105
|
-
kind=
|
|
107
|
+
kind=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
106
108
|
)
|
|
107
109
|
)
|
|
108
110
|
(
|
|
@@ -112,25 +114,25 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
112
114
|
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
113
115
|
monitoring_predictions_full_path
|
|
114
116
|
)
|
|
115
|
-
self.tables[
|
|
116
|
-
monitoring_predictions_path
|
|
117
|
-
)
|
|
117
|
+
self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
|
|
118
118
|
|
|
119
|
-
def
|
|
119
|
+
def create_tables(self) -> None:
|
|
120
120
|
"""
|
|
121
|
-
Create the
|
|
121
|
+
Create the tables using the TSDB connector. The tables are being created in the V3IO TSDB and include:
|
|
122
122
|
- app_results: a detailed result that includes status, kind, extra data, etc.
|
|
123
123
|
- metrics: a basic key value that represents a single numeric metric.
|
|
124
|
+
Note that the predictions table is automatically created by the model monitoring stream pod.
|
|
124
125
|
"""
|
|
125
126
|
application_tables = [
|
|
126
|
-
|
|
127
|
-
|
|
127
|
+
mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
128
|
+
mm_schemas.V3IOTSDBTables.METRICS,
|
|
128
129
|
]
|
|
129
|
-
for
|
|
130
|
-
logger.info("Creating table in V3IO TSDB",
|
|
130
|
+
for table_name in application_tables:
|
|
131
|
+
logger.info("Creating table in V3IO TSDB", table_name=table_name)
|
|
132
|
+
table = self.tables[table_name]
|
|
131
133
|
self._frames_client.create(
|
|
132
134
|
backend=_TSDB_BE,
|
|
133
|
-
table=
|
|
135
|
+
table=table,
|
|
134
136
|
if_exists=IGNORE,
|
|
135
137
|
rate=_TSDB_RATE,
|
|
136
138
|
)
|
|
@@ -156,20 +158,20 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
156
158
|
"storey.TSDBTarget",
|
|
157
159
|
name="tsdb_predictions",
|
|
158
160
|
after="MapFeatureNames",
|
|
159
|
-
path=f"{self.container}/{self.tables[
|
|
161
|
+
path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
|
|
160
162
|
rate="1/s",
|
|
161
|
-
time_col=
|
|
163
|
+
time_col=mm_schemas.EventFieldType.TIMESTAMP,
|
|
162
164
|
container=self.container,
|
|
163
165
|
v3io_frames=self.v3io_framesd,
|
|
164
166
|
columns=["latency"],
|
|
165
167
|
index_cols=[
|
|
166
|
-
|
|
168
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
167
169
|
],
|
|
168
170
|
aggr="count,avg",
|
|
169
171
|
aggr_granularity="1m",
|
|
170
172
|
max_events=tsdb_batching_max_events,
|
|
171
173
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
172
|
-
key=
|
|
174
|
+
key=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
173
175
|
)
|
|
174
176
|
|
|
175
177
|
# Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
|
|
@@ -198,40 +200,40 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
198
200
|
"storey.TSDBTarget",
|
|
199
201
|
name=name,
|
|
200
202
|
after=after,
|
|
201
|
-
path=f"{self.container}/{self.tables[
|
|
203
|
+
path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.EVENTS]}",
|
|
202
204
|
rate="10/m",
|
|
203
|
-
time_col=
|
|
205
|
+
time_col=mm_schemas.EventFieldType.TIMESTAMP,
|
|
204
206
|
container=self.container,
|
|
205
207
|
v3io_frames=self.v3io_framesd,
|
|
206
208
|
infer_columns_from_data=True,
|
|
207
209
|
index_cols=[
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
210
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
211
|
+
mm_schemas.EventFieldType.RECORD_TYPE,
|
|
212
|
+
mm_schemas.EventFieldType.ENDPOINT_TYPE,
|
|
211
213
|
],
|
|
212
214
|
max_events=tsdb_batching_max_events,
|
|
213
215
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
214
|
-
key=
|
|
216
|
+
key=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
215
217
|
)
|
|
216
218
|
|
|
217
219
|
# unpacked base_metrics dictionary
|
|
218
220
|
apply_filter_and_unpacked_keys(
|
|
219
221
|
name="FilterAndUnpackKeys1",
|
|
220
|
-
keys=
|
|
222
|
+
keys=mm_schemas.EventKeyMetrics.BASE_METRICS,
|
|
221
223
|
)
|
|
222
224
|
apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
|
|
223
225
|
|
|
224
226
|
# unpacked endpoint_features dictionary
|
|
225
227
|
apply_filter_and_unpacked_keys(
|
|
226
228
|
name="FilterAndUnpackKeys2",
|
|
227
|
-
keys=
|
|
229
|
+
keys=mm_schemas.EventKeyMetrics.ENDPOINT_FEATURES,
|
|
228
230
|
)
|
|
229
231
|
apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
|
|
230
232
|
|
|
231
233
|
# unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
|
|
232
234
|
apply_filter_and_unpacked_keys(
|
|
233
235
|
name="FilterAndUnpackKeys3",
|
|
234
|
-
keys=
|
|
236
|
+
keys=mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
235
237
|
)
|
|
236
238
|
|
|
237
239
|
def apply_storey_filter():
|
|
@@ -248,56 +250,55 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
248
250
|
def write_application_event(
|
|
249
251
|
self,
|
|
250
252
|
event: dict,
|
|
251
|
-
kind:
|
|
252
|
-
):
|
|
253
|
+
kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
|
|
254
|
+
) -> None:
|
|
253
255
|
"""Write a single result or metric to TSDB"""
|
|
254
256
|
|
|
255
|
-
event[
|
|
256
|
-
|
|
257
|
-
event[mm_constants.WriterEvent.END_INFER_TIME]
|
|
258
|
-
)
|
|
257
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME] = datetime.datetime.fromisoformat(
|
|
258
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
259
259
|
)
|
|
260
|
+
index_cols_base = [
|
|
261
|
+
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
262
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
263
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
264
|
+
]
|
|
260
265
|
|
|
261
|
-
if kind ==
|
|
262
|
-
|
|
263
|
-
|
|
266
|
+
if kind == mm_schemas.WriterEventKind.METRIC:
|
|
267
|
+
table = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
268
|
+
index_cols = index_cols_base + [mm_schemas.MetricData.METRIC_NAME]
|
|
269
|
+
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
270
|
+
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
271
|
+
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
272
|
+
del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
|
|
273
|
+
else:
|
|
274
|
+
raise ValueError(f"Invalid {kind = }")
|
|
264
275
|
|
|
265
|
-
del event[mm_constants.ResultData.RESULT_EXTRA_DATA]
|
|
266
276
|
try:
|
|
267
277
|
self._frames_client.write(
|
|
268
278
|
backend=_TSDB_BE,
|
|
269
|
-
table=
|
|
279
|
+
table=table,
|
|
270
280
|
dfs=pd.DataFrame.from_records([event]),
|
|
271
|
-
index_cols=
|
|
272
|
-
mm_constants.WriterEvent.END_INFER_TIME,
|
|
273
|
-
mm_constants.WriterEvent.ENDPOINT_ID,
|
|
274
|
-
mm_constants.WriterEvent.APPLICATION_NAME,
|
|
275
|
-
mm_constants.ResultData.RESULT_NAME,
|
|
276
|
-
],
|
|
277
|
-
)
|
|
278
|
-
logger.info(
|
|
279
|
-
"Updated V3IO TSDB successfully",
|
|
280
|
-
table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
|
|
281
|
+
index_cols=index_cols,
|
|
281
282
|
)
|
|
283
|
+
logger.info("Updated V3IO TSDB successfully", table=table)
|
|
282
284
|
except v3io_frames.errors.Error as err:
|
|
283
|
-
logger.
|
|
285
|
+
logger.exception(
|
|
284
286
|
"Could not write drift measures to TSDB",
|
|
285
287
|
err=err,
|
|
286
|
-
table=
|
|
288
|
+
table=table,
|
|
287
289
|
event=event,
|
|
288
290
|
)
|
|
289
|
-
|
|
290
291
|
raise mlrun.errors.MLRunRuntimeError(
|
|
291
292
|
f"Failed to write application result to TSDB: {err}"
|
|
292
293
|
)
|
|
293
294
|
|
|
294
|
-
def delete_tsdb_resources(self, table: str = None):
|
|
295
|
+
def delete_tsdb_resources(self, table: typing.Optional[str] = None):
|
|
295
296
|
if table:
|
|
296
297
|
# Delete a specific table
|
|
297
298
|
tables = [table]
|
|
298
299
|
else:
|
|
299
300
|
# Delete all tables
|
|
300
|
-
tables =
|
|
301
|
+
tables = mm_schemas.V3IOTSDBTables.list()
|
|
301
302
|
for table in tables:
|
|
302
303
|
try:
|
|
303
304
|
self._frames_client.delete(
|
|
@@ -320,8 +321,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
320
321
|
self,
|
|
321
322
|
endpoint_id: str,
|
|
322
323
|
metrics: list[str],
|
|
323
|
-
start: str
|
|
324
|
-
end: str
|
|
324
|
+
start: str,
|
|
325
|
+
end: str,
|
|
325
326
|
) -> dict[str, list[tuple[str, float]]]:
|
|
326
327
|
"""
|
|
327
328
|
Getting real time metrics from the TSDB. There are pre-defined metrics for model endpoints such as
|
|
@@ -350,7 +351,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
350
351
|
|
|
351
352
|
try:
|
|
352
353
|
data = self.get_records(
|
|
353
|
-
table=
|
|
354
|
+
table=mm_schemas.V3IOTSDBTables.EVENTS,
|
|
354
355
|
columns=["endpoint_id", *metrics],
|
|
355
356
|
filter_query=f"endpoint_id=='{endpoint_id}'",
|
|
356
357
|
start=start,
|
|
@@ -377,17 +378,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
377
378
|
def get_records(
|
|
378
379
|
self,
|
|
379
380
|
table: str,
|
|
380
|
-
|
|
381
|
+
start: str,
|
|
382
|
+
end: str,
|
|
383
|
+
columns: typing.Optional[list[str]] = None,
|
|
381
384
|
filter_query: str = "",
|
|
382
|
-
start: str = "now-1h",
|
|
383
|
-
end: str = "now",
|
|
384
385
|
) -> pd.DataFrame:
|
|
385
386
|
"""
|
|
386
387
|
Getting records from V3IO TSDB data collection.
|
|
387
388
|
:param table: Path to the collection to query.
|
|
388
|
-
:param columns: Columns to include in the result.
|
|
389
|
-
:param filter_query: V3IO filter expression. The expected filter expression includes different conditions,
|
|
390
|
-
divided by ' AND '.
|
|
391
389
|
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
392
390
|
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
393
391
|
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
|
|
@@ -396,6 +394,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
396
394
|
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
397
395
|
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
|
|
398
396
|
`'s'` = seconds), or 0 for the earliest time.
|
|
397
|
+
:param columns: Columns to include in the result.
|
|
398
|
+
:param filter_query: V3IO filter expression. The expected filter expression includes different conditions,
|
|
399
|
+
divided by ' AND '.
|
|
399
400
|
:return: DataFrame with the provided attributes from the data collection.
|
|
400
401
|
:raise: MLRunNotFoundError if the provided table wasn't found.
|
|
401
402
|
"""
|
|
@@ -422,7 +423,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
422
423
|
"""
|
|
423
424
|
events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
424
425
|
project=self.project,
|
|
425
|
-
kind=
|
|
426
|
+
kind=mm_schemas.FileTargetKind.EVENTS,
|
|
426
427
|
)
|
|
427
428
|
|
|
428
429
|
# Generate the main directory with the V3IO resources
|
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
# TODO: Move this module into the TSDB abstraction
|
|
15
|
+
# TODO: Move this module into the TSDB abstraction:
|
|
16
|
+
# mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py
|
|
16
17
|
|
|
17
18
|
from datetime import datetime
|
|
18
19
|
from io import StringIO
|
|
20
|
+
from typing import Literal, Optional, Union
|
|
19
21
|
|
|
20
22
|
import pandas as pd
|
|
21
23
|
|
|
@@ -25,21 +27,27 @@ import mlrun.model_monitoring.writer as mm_writer
|
|
|
25
27
|
import mlrun.utils.v3io_clients
|
|
26
28
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
27
29
|
ModelEndpointMonitoringMetric,
|
|
30
|
+
ModelEndpointMonitoringMetricNoData,
|
|
28
31
|
ModelEndpointMonitoringMetricType,
|
|
29
|
-
|
|
32
|
+
ModelEndpointMonitoringMetricValues,
|
|
30
33
|
ModelEndpointMonitoringResultValues,
|
|
31
34
|
_compose_full_name,
|
|
32
|
-
|
|
35
|
+
_ModelEndpointMonitoringMetricValuesBase,
|
|
33
36
|
)
|
|
34
37
|
from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
|
|
35
38
|
from mlrun.model_monitoring.db.tsdb.v3io.v3io_connector import _TSDB_BE
|
|
36
39
|
from mlrun.utils import logger
|
|
37
40
|
|
|
38
41
|
|
|
39
|
-
def _get_sql_query(
|
|
42
|
+
def _get_sql_query(
|
|
43
|
+
endpoint_id: str,
|
|
44
|
+
names: list[tuple[str, str]],
|
|
45
|
+
table_name: str = mm_constants.V3IOTSDBTables.APP_RESULTS,
|
|
46
|
+
name: str = mm_writer.ResultData.RESULT_NAME,
|
|
47
|
+
) -> str:
|
|
40
48
|
with StringIO() as query:
|
|
41
49
|
query.write(
|
|
42
|
-
f"SELECT * FROM '{
|
|
50
|
+
f"SELECT * FROM '{table_name}' "
|
|
43
51
|
f"WHERE {mm_writer.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
|
|
44
52
|
)
|
|
45
53
|
if names:
|
|
@@ -48,7 +56,7 @@ def _get_sql_query(endpoint_id: str, names: list[tuple[str, str]]) -> str:
|
|
|
48
56
|
for i, (app_name, result_name) in enumerate(names):
|
|
49
57
|
sub_cond = (
|
|
50
58
|
f"({mm_writer.WriterEvent.APPLICATION_NAME}='{app_name}' "
|
|
51
|
-
f"AND {
|
|
59
|
+
f"AND {name}='{result_name}')"
|
|
52
60
|
)
|
|
53
61
|
if i != 0: # not first sub condition
|
|
54
62
|
query.write(" OR ")
|
|
@@ -73,30 +81,87 @@ def _get_result_kind(result_df: pd.DataFrame) -> mm_constants.ResultKindApp:
|
|
|
73
81
|
return unique_kinds[0]
|
|
74
82
|
|
|
75
83
|
|
|
76
|
-
def
|
|
84
|
+
def read_metrics_data(
|
|
77
85
|
*,
|
|
78
86
|
project: str,
|
|
79
87
|
endpoint_id: str,
|
|
80
88
|
start: datetime,
|
|
81
89
|
end: datetime,
|
|
82
90
|
metrics: list[ModelEndpointMonitoringMetric],
|
|
83
|
-
|
|
91
|
+
type: Literal["metrics", "results"] = "results",
|
|
92
|
+
) -> Union[
|
|
93
|
+
list[
|
|
94
|
+
Union[
|
|
95
|
+
ModelEndpointMonitoringResultValues,
|
|
96
|
+
ModelEndpointMonitoringMetricNoData,
|
|
97
|
+
],
|
|
98
|
+
],
|
|
99
|
+
list[
|
|
100
|
+
Union[
|
|
101
|
+
ModelEndpointMonitoringMetricValues,
|
|
102
|
+
ModelEndpointMonitoringMetricNoData,
|
|
103
|
+
],
|
|
104
|
+
],
|
|
105
|
+
]:
|
|
106
|
+
"""
|
|
107
|
+
Read metrics OR results from the TSDB and return as a list.
|
|
108
|
+
Note: the type must match the actual metrics in the `metrics` parameter.
|
|
109
|
+
If the type is "results", pass only results in the `metrics` parameter.
|
|
110
|
+
"""
|
|
84
111
|
client = mlrun.utils.v3io_clients.get_frames_client(
|
|
85
112
|
address=mlrun.mlconf.v3io_framesd,
|
|
86
113
|
container=KVStoreBase.get_v3io_monitoring_apps_container(project),
|
|
87
114
|
)
|
|
115
|
+
|
|
116
|
+
if type == "metrics":
|
|
117
|
+
table_name = mm_constants.V3IOTSDBTables.METRICS
|
|
118
|
+
name = mm_constants.MetricData.METRIC_NAME
|
|
119
|
+
df_handler = df_to_metrics_values
|
|
120
|
+
elif type == "results":
|
|
121
|
+
table_name = mm_constants.V3IOTSDBTables.APP_RESULTS
|
|
122
|
+
name = mm_constants.ResultData.RESULT_NAME
|
|
123
|
+
df_handler = df_to_results_values
|
|
124
|
+
else:
|
|
125
|
+
raise ValueError(f"Invalid {type = }")
|
|
126
|
+
|
|
127
|
+
query = _get_sql_query(
|
|
128
|
+
endpoint_id,
|
|
129
|
+
[(metric.app, metric.name) for metric in metrics],
|
|
130
|
+
table_name=table_name,
|
|
131
|
+
name=name,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
logger.debug("Querying V3IO TSDB", query=query)
|
|
135
|
+
|
|
88
136
|
df: pd.DataFrame = client.read(
|
|
89
137
|
backend=_TSDB_BE,
|
|
90
|
-
query=
|
|
91
|
-
endpoint_id, [(metric.app, metric.name) for metric in metrics]
|
|
92
|
-
),
|
|
138
|
+
query=query,
|
|
93
139
|
start=start,
|
|
94
140
|
end=end,
|
|
95
141
|
)
|
|
96
142
|
|
|
143
|
+
logger.debug(
|
|
144
|
+
"Read a data-frame", project=project, endpoint_id=endpoint_id, is_empty=df.empty
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return df_handler(df=df, metrics=metrics, project=project)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def df_to_results_values(
|
|
151
|
+
*, df: pd.DataFrame, metrics: list[ModelEndpointMonitoringMetric], project: str
|
|
152
|
+
) -> list[
|
|
153
|
+
Union[ModelEndpointMonitoringResultValues, ModelEndpointMonitoringMetricNoData]
|
|
154
|
+
]:
|
|
155
|
+
"""
|
|
156
|
+
Parse a time-indexed data-frame of results from the TSDB into a list of
|
|
157
|
+
results values per distinct results.
|
|
158
|
+
When a result is not found in the data-frame, it is represented in no-data object.
|
|
159
|
+
"""
|
|
97
160
|
metrics_without_data = {metric.full_name: metric for metric in metrics}
|
|
98
161
|
|
|
99
|
-
metrics_values: list[
|
|
162
|
+
metrics_values: list[
|
|
163
|
+
Union[ModelEndpointMonitoringResultValues, ModelEndpointMonitoringMetricNoData]
|
|
164
|
+
] = []
|
|
100
165
|
if not df.empty:
|
|
101
166
|
grouped = df.groupby(
|
|
102
167
|
[mm_writer.WriterEvent.APPLICATION_NAME, mm_writer.ResultData.RESULT_NAME],
|
|
@@ -104,13 +169,13 @@ def read_data(
|
|
|
104
169
|
)
|
|
105
170
|
else:
|
|
106
171
|
grouped = []
|
|
107
|
-
|
|
172
|
+
logger.debug("No results", missing_results=metrics_without_data.keys())
|
|
173
|
+
for (app_name, name), sub_df in grouped:
|
|
108
174
|
result_kind = _get_result_kind(sub_df)
|
|
109
|
-
full_name = _compose_full_name(project=project, app=app_name, name=
|
|
175
|
+
full_name = _compose_full_name(project=project, app=app_name, name=name)
|
|
110
176
|
metrics_values.append(
|
|
111
177
|
ModelEndpointMonitoringResultValues(
|
|
112
178
|
full_name=full_name,
|
|
113
|
-
type=ModelEndpointMonitoringMetricType.RESULT,
|
|
114
179
|
result_kind=result_kind,
|
|
115
180
|
values=list(
|
|
116
181
|
zip(
|
|
@@ -124,11 +189,147 @@ def read_data(
|
|
|
124
189
|
del metrics_without_data[full_name]
|
|
125
190
|
|
|
126
191
|
for metric in metrics_without_data.values():
|
|
192
|
+
if metric.full_name == get_invocations_fqn(project):
|
|
193
|
+
continue
|
|
127
194
|
metrics_values.append(
|
|
128
|
-
|
|
195
|
+
ModelEndpointMonitoringMetricNoData(
|
|
129
196
|
full_name=metric.full_name,
|
|
130
197
|
type=ModelEndpointMonitoringMetricType.RESULT,
|
|
131
198
|
)
|
|
132
199
|
)
|
|
133
200
|
|
|
134
201
|
return metrics_values
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def df_to_metrics_values(
|
|
205
|
+
*, df: pd.DataFrame, metrics: list[ModelEndpointMonitoringMetric], project: str
|
|
206
|
+
) -> list[
|
|
207
|
+
Union[ModelEndpointMonitoringMetricValues, ModelEndpointMonitoringMetricNoData]
|
|
208
|
+
]:
|
|
209
|
+
"""
|
|
210
|
+
Parse a time-indexed data-frame of metrics from the TSDB into a list of
|
|
211
|
+
metrics values per distinct results.
|
|
212
|
+
When a metric is not found in the data-frame, it is represented in no-data object.
|
|
213
|
+
"""
|
|
214
|
+
metrics_without_data = {metric.full_name: metric for metric in metrics}
|
|
215
|
+
|
|
216
|
+
metrics_values: list[
|
|
217
|
+
Union[ModelEndpointMonitoringMetricValues, ModelEndpointMonitoringMetricNoData]
|
|
218
|
+
] = []
|
|
219
|
+
if not df.empty:
|
|
220
|
+
grouped = df.groupby(
|
|
221
|
+
[mm_writer.WriterEvent.APPLICATION_NAME, mm_writer.MetricData.METRIC_NAME],
|
|
222
|
+
observed=False,
|
|
223
|
+
)
|
|
224
|
+
else:
|
|
225
|
+
logger.debug("No metrics", missing_metrics=metrics_without_data.keys())
|
|
226
|
+
grouped = []
|
|
227
|
+
for (app_name, name), sub_df in grouped:
|
|
228
|
+
full_name = _compose_full_name(
|
|
229
|
+
project=project,
|
|
230
|
+
app=app_name,
|
|
231
|
+
name=name,
|
|
232
|
+
type=ModelEndpointMonitoringMetricType.METRIC,
|
|
233
|
+
)
|
|
234
|
+
metrics_values.append(
|
|
235
|
+
ModelEndpointMonitoringMetricValues(
|
|
236
|
+
full_name=full_name,
|
|
237
|
+
values=list(
|
|
238
|
+
zip(
|
|
239
|
+
sub_df.index,
|
|
240
|
+
sub_df[mm_writer.MetricData.METRIC_VALUE],
|
|
241
|
+
)
|
|
242
|
+
), # pyright: ignore[reportArgumentType]
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
del metrics_without_data[full_name]
|
|
246
|
+
|
|
247
|
+
for metric in metrics_without_data.values():
|
|
248
|
+
metrics_values.append(
|
|
249
|
+
ModelEndpointMonitoringMetricNoData(
|
|
250
|
+
full_name=metric.full_name,
|
|
251
|
+
type=ModelEndpointMonitoringMetricType.METRIC,
|
|
252
|
+
)
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return metrics_values
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def get_invocations_fqn(project: str):
|
|
259
|
+
return mlrun.common.schemas.model_monitoring.model_endpoints._compose_full_name(
|
|
260
|
+
project=project,
|
|
261
|
+
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
262
|
+
name=mlrun.common.schemas.model_monitoring.constants.PredictionsQueryConstants.INVOCATIONS,
|
|
263
|
+
type=mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetricType.METRIC,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def read_predictions(
|
|
268
|
+
*,
|
|
269
|
+
project: str,
|
|
270
|
+
endpoint_id: str,
|
|
271
|
+
start: Optional[Union[datetime, str]] = None,
|
|
272
|
+
end: Optional[Union[datetime, str]] = None,
|
|
273
|
+
aggregation_window: Optional[str] = None,
|
|
274
|
+
limit: Optional[int] = None,
|
|
275
|
+
) -> _ModelEndpointMonitoringMetricValuesBase:
|
|
276
|
+
client = mlrun.utils.v3io_clients.get_frames_client(
|
|
277
|
+
address=mlrun.mlconf.v3io_framesd,
|
|
278
|
+
container="users",
|
|
279
|
+
)
|
|
280
|
+
frames_client_kwargs = {}
|
|
281
|
+
if aggregation_window:
|
|
282
|
+
frames_client_kwargs["step"] = aggregation_window
|
|
283
|
+
frames_client_kwargs["aggregation_window"] = aggregation_window
|
|
284
|
+
if limit:
|
|
285
|
+
frames_client_kwargs["limit"] = limit
|
|
286
|
+
df: pd.DataFrame = client.read(
|
|
287
|
+
backend=_TSDB_BE,
|
|
288
|
+
table=f"pipelines/{project}/model-endpoints/predictions",
|
|
289
|
+
columns=["latency"],
|
|
290
|
+
filter=f"endpoint_id=='{endpoint_id}'",
|
|
291
|
+
start=start,
|
|
292
|
+
end=end,
|
|
293
|
+
aggregators="count",
|
|
294
|
+
**frames_client_kwargs,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
full_name = get_invocations_fqn(project)
|
|
298
|
+
|
|
299
|
+
if df.empty:
|
|
300
|
+
return ModelEndpointMonitoringMetricNoData(
|
|
301
|
+
full_name=full_name,
|
|
302
|
+
type=ModelEndpointMonitoringMetricType.METRIC,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
return ModelEndpointMonitoringMetricValues(
|
|
306
|
+
full_name=full_name,
|
|
307
|
+
values=list(
|
|
308
|
+
zip(
|
|
309
|
+
df.index,
|
|
310
|
+
df["count(latency)"],
|
|
311
|
+
)
|
|
312
|
+
),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def read_prediction_metric_for_endpoint_if_exists(
|
|
317
|
+
*,
|
|
318
|
+
project: str,
|
|
319
|
+
endpoint_id: str,
|
|
320
|
+
) -> Optional[ModelEndpointMonitoringMetric]:
|
|
321
|
+
predictions = read_predictions(
|
|
322
|
+
project=project,
|
|
323
|
+
endpoint_id=endpoint_id,
|
|
324
|
+
start="0",
|
|
325
|
+
end="now",
|
|
326
|
+
limit=1, # Read just one record, because we just want to check if there is any data for this endpoint_id
|
|
327
|
+
)
|
|
328
|
+
if predictions:
|
|
329
|
+
return ModelEndpointMonitoringMetric(
|
|
330
|
+
project=project,
|
|
331
|
+
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
332
|
+
type=ModelEndpointMonitoringMetricType.METRIC,
|
|
333
|
+
name=mlrun.common.schemas.model_monitoring.constants.PredictionsQueryConstants.INVOCATIONS,
|
|
334
|
+
full_name=get_invocations_fqn(project),
|
|
335
|
+
)
|