mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +5 -2
- mlrun/alerts/alert.py +1 -1
- mlrun/artifacts/manager.py +5 -1
- mlrun/common/constants.py +64 -3
- mlrun/common/formatters/__init__.py +16 -0
- mlrun/common/formatters/base.py +59 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/runtimes/constants.py +32 -4
- mlrun/common/schemas/__init__.py +1 -2
- mlrun/common/schemas/alert.py +31 -9
- mlrun/common/schemas/api_gateway.py +52 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +9 -4
- mlrun/common/schemas/model_monitoring/constants.py +22 -8
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
- mlrun/config.py +9 -2
- mlrun/data_types/to_pandas.py +5 -5
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +106 -7
- mlrun/datastore/store_resources.py +5 -1
- mlrun/datastore/targets.py +5 -4
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +5 -1
- mlrun/db/httpdb.py +22 -3
- mlrun/db/nopdb.py +5 -1
- mlrun/errors.py +6 -0
- mlrun/execution.py +16 -6
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/conversion.py +5 -5
- mlrun/feature_store/retrieval/job.py +7 -3
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/launcher/client.py +4 -2
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/model.py +5 -1
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +16 -4
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +235 -166
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
- mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
- mlrun/model_monitoring/db/tsdb/base.py +232 -38
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +292 -104
- mlrun/model_monitoring/helpers.py +45 -0
- mlrun/model_monitoring/stream_processing.py +7 -4
- mlrun/model_monitoring/writer.py +50 -20
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/projects/operations.py +8 -5
- mlrun/projects/pipelines.py +42 -15
- mlrun/projects/project.py +55 -14
- mlrun/render.py +8 -5
- mlrun/runtimes/base.py +2 -1
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/local.py +4 -1
- mlrun/runtimes/nuclio/api_gateway.py +32 -8
- mlrun/runtimes/nuclio/application/application.py +3 -3
- mlrun/runtimes/nuclio/function.py +1 -4
- mlrun/runtimes/utils.py +5 -6
- mlrun/serving/server.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +28 -7
- mlrun/utils/logger.py +28 -1
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/slack.py +27 -7
- mlrun/utils/notifications/notification_pusher.py +47 -42
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +9 -4
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +89 -82
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -134
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
|
@@ -12,23 +12,27 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
15
|
+
import typing
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from io import StringIO
|
|
18
|
+
from typing import Literal, Optional, Union
|
|
16
19
|
|
|
17
20
|
import pandas as pd
|
|
18
21
|
import v3io_frames.client
|
|
19
22
|
import v3io_frames.errors
|
|
20
|
-
from v3io.dataplane import Client as V3IOClient
|
|
21
23
|
from v3io_frames.frames_pb2 import IGNORE
|
|
22
24
|
|
|
23
25
|
import mlrun.common.model_monitoring
|
|
24
|
-
import mlrun.common.schemas.model_monitoring as
|
|
26
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
25
27
|
import mlrun.feature_store.steps
|
|
26
28
|
import mlrun.utils.v3io_clients
|
|
27
29
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
30
|
+
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
28
31
|
from mlrun.utils import logger
|
|
29
32
|
|
|
30
33
|
_TSDB_BE = "tsdb"
|
|
31
34
|
_TSDB_RATE = "1/s"
|
|
35
|
+
_CONTAINER = "users"
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
class V3IOTSDBConnector(TSDBConnector):
|
|
@@ -37,16 +41,16 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
37
41
|
Client that provides API for executing commands on the V3IO TSDB table.
|
|
38
42
|
"""
|
|
39
43
|
|
|
44
|
+
type: str = mm_schemas.TSDBTarget.V3IO_TSDB
|
|
45
|
+
|
|
40
46
|
def __init__(
|
|
41
47
|
self,
|
|
42
48
|
project: str,
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
v3io_framesd: str = None,
|
|
49
|
+
container: str = _CONTAINER,
|
|
50
|
+
v3io_framesd: typing.Optional[str] = None,
|
|
46
51
|
create_table: bool = False,
|
|
47
|
-
):
|
|
52
|
+
) -> None:
|
|
48
53
|
super().__init__(project=project)
|
|
49
|
-
self.access_key = access_key or mlrun.mlconf.get_v3io_access_key()
|
|
50
54
|
|
|
51
55
|
self.container = container
|
|
52
56
|
|
|
@@ -54,21 +58,18 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
54
58
|
self._frames_client: v3io_frames.client.ClientBase = (
|
|
55
59
|
self._get_v3io_frames_client(self.container)
|
|
56
60
|
)
|
|
57
|
-
self._v3io_client: V3IOClient = mlrun.utils.v3io_clients.get_v3io_client(
|
|
58
|
-
endpoint=mlrun.mlconf.v3io_api,
|
|
59
|
-
)
|
|
60
61
|
|
|
61
62
|
self._init_tables_path()
|
|
62
63
|
|
|
63
64
|
if create_table:
|
|
64
|
-
self.
|
|
65
|
+
self.create_tables()
|
|
65
66
|
|
|
66
67
|
def _init_tables_path(self):
|
|
67
68
|
self.tables = {}
|
|
68
69
|
|
|
69
70
|
events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
70
71
|
project=self.project,
|
|
71
|
-
kind=
|
|
72
|
+
kind=mm_schemas.FileTargetKind.EVENTS,
|
|
72
73
|
)
|
|
73
74
|
(
|
|
74
75
|
_,
|
|
@@ -77,12 +78,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
77
78
|
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
78
79
|
events_table_full_path
|
|
79
80
|
)
|
|
80
|
-
self.tables[
|
|
81
|
+
self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
|
|
81
82
|
|
|
82
83
|
monitoring_application_full_path = (
|
|
83
84
|
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
84
85
|
project=self.project,
|
|
85
|
-
kind=
|
|
86
|
+
kind=mm_schemas.FileTargetKind.MONITORING_APPLICATION,
|
|
86
87
|
)
|
|
87
88
|
)
|
|
88
89
|
(
|
|
@@ -92,17 +93,17 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
92
93
|
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
93
94
|
monitoring_application_full_path
|
|
94
95
|
)
|
|
95
|
-
self.tables[
|
|
96
|
-
monitoring_application_path +
|
|
96
|
+
self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS] = (
|
|
97
|
+
monitoring_application_path + mm_schemas.V3IOTSDBTables.APP_RESULTS
|
|
97
98
|
)
|
|
98
|
-
self.tables[
|
|
99
|
-
monitoring_application_path +
|
|
99
|
+
self.tables[mm_schemas.V3IOTSDBTables.METRICS] = (
|
|
100
|
+
monitoring_application_path + mm_schemas.V3IOTSDBTables.METRICS
|
|
100
101
|
)
|
|
101
102
|
|
|
102
103
|
monitoring_predictions_full_path = (
|
|
103
104
|
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
104
105
|
project=self.project,
|
|
105
|
-
kind=
|
|
106
|
+
kind=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
106
107
|
)
|
|
107
108
|
)
|
|
108
109
|
(
|
|
@@ -112,25 +113,25 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
112
113
|
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
113
114
|
monitoring_predictions_full_path
|
|
114
115
|
)
|
|
115
|
-
self.tables[
|
|
116
|
-
monitoring_predictions_path
|
|
117
|
-
)
|
|
116
|
+
self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
|
|
118
117
|
|
|
119
|
-
def
|
|
118
|
+
def create_tables(self) -> None:
|
|
120
119
|
"""
|
|
121
|
-
Create the
|
|
120
|
+
Create the tables using the TSDB connector. The tables are being created in the V3IO TSDB and include:
|
|
122
121
|
- app_results: a detailed result that includes status, kind, extra data, etc.
|
|
123
122
|
- metrics: a basic key value that represents a single numeric metric.
|
|
123
|
+
Note that the predictions table is automatically created by the model monitoring stream pod.
|
|
124
124
|
"""
|
|
125
125
|
application_tables = [
|
|
126
|
-
|
|
127
|
-
|
|
126
|
+
mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
127
|
+
mm_schemas.V3IOTSDBTables.METRICS,
|
|
128
128
|
]
|
|
129
|
-
for
|
|
130
|
-
logger.info("Creating table in V3IO TSDB",
|
|
129
|
+
for table_name in application_tables:
|
|
130
|
+
logger.info("Creating table in V3IO TSDB", table_name=table_name)
|
|
131
|
+
table = self.tables[table_name]
|
|
131
132
|
self._frames_client.create(
|
|
132
133
|
backend=_TSDB_BE,
|
|
133
|
-
table=
|
|
134
|
+
table=table,
|
|
134
135
|
if_exists=IGNORE,
|
|
135
136
|
rate=_TSDB_RATE,
|
|
136
137
|
)
|
|
@@ -156,20 +157,20 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
156
157
|
"storey.TSDBTarget",
|
|
157
158
|
name="tsdb_predictions",
|
|
158
159
|
after="MapFeatureNames",
|
|
159
|
-
path=f"{self.container}/{self.tables[
|
|
160
|
+
path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
|
|
160
161
|
rate="1/s",
|
|
161
|
-
time_col=
|
|
162
|
+
time_col=mm_schemas.EventFieldType.TIMESTAMP,
|
|
162
163
|
container=self.container,
|
|
163
164
|
v3io_frames=self.v3io_framesd,
|
|
164
165
|
columns=["latency"],
|
|
165
166
|
index_cols=[
|
|
166
|
-
|
|
167
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
167
168
|
],
|
|
168
169
|
aggr="count,avg",
|
|
169
170
|
aggr_granularity="1m",
|
|
170
171
|
max_events=tsdb_batching_max_events,
|
|
171
172
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
172
|
-
key=
|
|
173
|
+
key=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
173
174
|
)
|
|
174
175
|
|
|
175
176
|
# Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
|
|
@@ -198,40 +199,40 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
198
199
|
"storey.TSDBTarget",
|
|
199
200
|
name=name,
|
|
200
201
|
after=after,
|
|
201
|
-
path=f"{self.container}/{self.tables[
|
|
202
|
+
path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.EVENTS]}",
|
|
202
203
|
rate="10/m",
|
|
203
|
-
time_col=
|
|
204
|
+
time_col=mm_schemas.EventFieldType.TIMESTAMP,
|
|
204
205
|
container=self.container,
|
|
205
206
|
v3io_frames=self.v3io_framesd,
|
|
206
207
|
infer_columns_from_data=True,
|
|
207
208
|
index_cols=[
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
209
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
210
|
+
mm_schemas.EventFieldType.RECORD_TYPE,
|
|
211
|
+
mm_schemas.EventFieldType.ENDPOINT_TYPE,
|
|
211
212
|
],
|
|
212
213
|
max_events=tsdb_batching_max_events,
|
|
213
214
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
214
|
-
key=
|
|
215
|
+
key=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
215
216
|
)
|
|
216
217
|
|
|
217
218
|
# unpacked base_metrics dictionary
|
|
218
219
|
apply_filter_and_unpacked_keys(
|
|
219
220
|
name="FilterAndUnpackKeys1",
|
|
220
|
-
keys=
|
|
221
|
+
keys=mm_schemas.EventKeyMetrics.BASE_METRICS,
|
|
221
222
|
)
|
|
222
223
|
apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
|
|
223
224
|
|
|
224
225
|
# unpacked endpoint_features dictionary
|
|
225
226
|
apply_filter_and_unpacked_keys(
|
|
226
227
|
name="FilterAndUnpackKeys2",
|
|
227
|
-
keys=
|
|
228
|
+
keys=mm_schemas.EventKeyMetrics.ENDPOINT_FEATURES,
|
|
228
229
|
)
|
|
229
230
|
apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
|
|
230
231
|
|
|
231
232
|
# unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
|
|
232
233
|
apply_filter_and_unpacked_keys(
|
|
233
234
|
name="FilterAndUnpackKeys3",
|
|
234
|
-
keys=
|
|
235
|
+
keys=mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
235
236
|
)
|
|
236
237
|
|
|
237
238
|
def apply_storey_filter():
|
|
@@ -248,62 +249,58 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
248
249
|
def write_application_event(
|
|
249
250
|
self,
|
|
250
251
|
event: dict,
|
|
251
|
-
kind:
|
|
252
|
-
):
|
|
252
|
+
kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
|
|
253
|
+
) -> None:
|
|
253
254
|
"""Write a single result or metric to TSDB"""
|
|
254
255
|
|
|
255
|
-
event[
|
|
256
|
-
|
|
257
|
-
event[mm_constants.WriterEvent.END_INFER_TIME]
|
|
258
|
-
)
|
|
256
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME] = datetime.fromisoformat(
|
|
257
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
259
258
|
)
|
|
259
|
+
index_cols_base = [
|
|
260
|
+
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
261
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
262
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
263
|
+
]
|
|
260
264
|
|
|
261
|
-
if kind ==
|
|
262
|
-
|
|
263
|
-
|
|
265
|
+
if kind == mm_schemas.WriterEventKind.METRIC:
|
|
266
|
+
table = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
267
|
+
index_cols = index_cols_base + [mm_schemas.MetricData.METRIC_NAME]
|
|
268
|
+
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
269
|
+
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
270
|
+
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
271
|
+
del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
|
|
272
|
+
else:
|
|
273
|
+
raise ValueError(f"Invalid {kind = }")
|
|
264
274
|
|
|
265
|
-
del event[mm_constants.ResultData.RESULT_EXTRA_DATA]
|
|
266
275
|
try:
|
|
267
276
|
self._frames_client.write(
|
|
268
277
|
backend=_TSDB_BE,
|
|
269
|
-
table=
|
|
278
|
+
table=table,
|
|
270
279
|
dfs=pd.DataFrame.from_records([event]),
|
|
271
|
-
index_cols=
|
|
272
|
-
mm_constants.WriterEvent.END_INFER_TIME,
|
|
273
|
-
mm_constants.WriterEvent.ENDPOINT_ID,
|
|
274
|
-
mm_constants.WriterEvent.APPLICATION_NAME,
|
|
275
|
-
mm_constants.ResultData.RESULT_NAME,
|
|
276
|
-
],
|
|
277
|
-
)
|
|
278
|
-
logger.info(
|
|
279
|
-
"Updated V3IO TSDB successfully",
|
|
280
|
-
table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
|
|
280
|
+
index_cols=index_cols,
|
|
281
281
|
)
|
|
282
|
+
logger.info("Updated V3IO TSDB successfully", table=table)
|
|
282
283
|
except v3io_frames.errors.Error as err:
|
|
283
|
-
logger.
|
|
284
|
+
logger.exception(
|
|
284
285
|
"Could not write drift measures to TSDB",
|
|
285
286
|
err=err,
|
|
286
|
-
table=
|
|
287
|
+
table=table,
|
|
287
288
|
event=event,
|
|
288
289
|
)
|
|
289
|
-
|
|
290
290
|
raise mlrun.errors.MLRunRuntimeError(
|
|
291
291
|
f"Failed to write application result to TSDB: {err}"
|
|
292
292
|
)
|
|
293
293
|
|
|
294
|
-
def delete_tsdb_resources(self, table: str = None):
|
|
294
|
+
def delete_tsdb_resources(self, table: typing.Optional[str] = None):
|
|
295
295
|
if table:
|
|
296
296
|
# Delete a specific table
|
|
297
297
|
tables = [table]
|
|
298
298
|
else:
|
|
299
299
|
# Delete all tables
|
|
300
|
-
tables =
|
|
301
|
-
for
|
|
300
|
+
tables = mm_schemas.V3IOTSDBTables.list()
|
|
301
|
+
for table_to_delete in tables:
|
|
302
302
|
try:
|
|
303
|
-
self._frames_client.delete(
|
|
304
|
-
backend=mlrun.common.schemas.model_monitoring.TimeSeriesConnector.TSDB,
|
|
305
|
-
table=table,
|
|
306
|
-
)
|
|
303
|
+
self._frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
|
|
307
304
|
except v3io_frames.errors.DeleteError as e:
|
|
308
305
|
logger.warning(
|
|
309
306
|
f"Failed to delete TSDB table '{table}'",
|
|
@@ -317,11 +314,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
317
314
|
store.rm(tsdb_path, recursive=True)
|
|
318
315
|
|
|
319
316
|
def get_model_endpoint_real_time_metrics(
|
|
320
|
-
self,
|
|
321
|
-
endpoint_id: str,
|
|
322
|
-
metrics: list[str],
|
|
323
|
-
start: str = "now-1h",
|
|
324
|
-
end: str = "now",
|
|
317
|
+
self, endpoint_id: str, metrics: list[str], start: str, end: str
|
|
325
318
|
) -> dict[str, list[tuple[str, float]]]:
|
|
326
319
|
"""
|
|
327
320
|
Getting real time metrics from the TSDB. There are pre-defined metrics for model endpoints such as
|
|
@@ -349,8 +342,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
349
342
|
metrics_mapping = {}
|
|
350
343
|
|
|
351
344
|
try:
|
|
352
|
-
data = self.
|
|
353
|
-
table=
|
|
345
|
+
data = self._get_records(
|
|
346
|
+
table=mm_schemas.V3IOTSDBTables.EVENTS,
|
|
354
347
|
columns=["endpoint_id", *metrics],
|
|
355
348
|
filter_query=f"endpoint_id=='{endpoint_id}'",
|
|
356
349
|
start=start,
|
|
@@ -374,45 +367,74 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
374
367
|
|
|
375
368
|
return metrics_mapping
|
|
376
369
|
|
|
377
|
-
def
|
|
370
|
+
def _get_records(
|
|
378
371
|
self,
|
|
379
372
|
table: str,
|
|
380
|
-
|
|
373
|
+
start: Union[datetime, str],
|
|
374
|
+
end: Union[datetime, str],
|
|
375
|
+
columns: typing.Optional[list[str]] = None,
|
|
381
376
|
filter_query: str = "",
|
|
382
|
-
|
|
383
|
-
|
|
377
|
+
interval: typing.Optional[str] = None,
|
|
378
|
+
agg_funcs: typing.Optional[list] = None,
|
|
379
|
+
limit: typing.Optional[int] = None,
|
|
380
|
+
sliding_window_step: typing.Optional[str] = None,
|
|
381
|
+
**kwargs,
|
|
384
382
|
) -> pd.DataFrame:
|
|
385
383
|
"""
|
|
386
384
|
Getting records from V3IO TSDB data collection.
|
|
387
|
-
:param table:
|
|
388
|
-
:param
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
385
|
+
:param table: Path to the collection to query.
|
|
386
|
+
:param start: The start time of the metrics. Can be represented by a string containing an RFC
|
|
387
|
+
3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
388
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
|
|
389
|
+
`'s'` = seconds), or 0 for the earliest time.
|
|
390
|
+
:param end: The end time of the metrics. Can be represented by a string containing an RFC
|
|
391
|
+
3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
392
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
|
|
393
|
+
`'s'` = seconds), or 0 for the earliest time.
|
|
394
|
+
:param columns: Columns to include in the result.
|
|
395
|
+
:param filter_query: V3IO filter expression. The expected filter expression includes different
|
|
396
|
+
conditions, divided by ' AND '.
|
|
397
|
+
:param interval: The interval to aggregate the data by. Note that if interval is provided,
|
|
398
|
+
agg_funcs must bg provided as well. Provided as a string in the format of '1m',
|
|
399
|
+
'1h', etc.
|
|
400
|
+
:param agg_funcs: The aggregation functions to apply on the columns. Note that if `agg_funcs` is
|
|
401
|
+
provided, `interval` must bg provided as well. Provided as a list of strings in
|
|
402
|
+
the format of ['sum', 'avg', 'count', ...].
|
|
403
|
+
:param limit: The maximum number of records to return.
|
|
404
|
+
:param sliding_window_step: The time step for which the time window moves forward. Note that if
|
|
405
|
+
`sliding_window_step` is provided, interval must be provided as well. Provided
|
|
406
|
+
as a string in the format of '1m', '1h', etc.
|
|
407
|
+
:param kwargs: Additional keyword arguments passed to the read method of frames client.
|
|
399
408
|
:return: DataFrame with the provided attributes from the data collection.
|
|
400
409
|
:raise: MLRunNotFoundError if the provided table wasn't found.
|
|
401
410
|
"""
|
|
402
411
|
if table not in self.tables:
|
|
403
412
|
raise mlrun.errors.MLRunNotFoundError(
|
|
404
|
-
f"Table '{table}' does not exist in the tables list of the TSDB connector."
|
|
413
|
+
f"Table '{table}' does not exist in the tables list of the TSDB connector. "
|
|
405
414
|
f"Available tables: {list(self.tables.keys())}"
|
|
406
415
|
)
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
416
|
+
|
|
417
|
+
if agg_funcs:
|
|
418
|
+
# Frames client expects the aggregators to be a comma-separated string
|
|
419
|
+
agg_funcs = ",".join(agg_funcs)
|
|
420
|
+
table_path = self.tables[table]
|
|
421
|
+
df = self._frames_client.read(
|
|
422
|
+
backend=_TSDB_BE,
|
|
423
|
+
table=table_path,
|
|
412
424
|
start=start,
|
|
413
425
|
end=end,
|
|
426
|
+
columns=columns,
|
|
427
|
+
filter=filter_query,
|
|
428
|
+
aggregation_window=interval,
|
|
429
|
+
aggregators=agg_funcs,
|
|
430
|
+
step=sliding_window_step,
|
|
431
|
+
**kwargs,
|
|
414
432
|
)
|
|
415
433
|
|
|
434
|
+
if limit:
|
|
435
|
+
df = df.head(limit)
|
|
436
|
+
return df
|
|
437
|
+
|
|
416
438
|
def _get_v3io_source_directory(self) -> str:
|
|
417
439
|
"""
|
|
418
440
|
Get the V3IO source directory for the current project. Usually the source directory will
|
|
@@ -422,7 +444,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
422
444
|
"""
|
|
423
445
|
events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
424
446
|
project=self.project,
|
|
425
|
-
kind=
|
|
447
|
+
kind=mm_schemas.FileTargetKind.EVENTS,
|
|
426
448
|
)
|
|
427
449
|
|
|
428
450
|
# Generate the main directory with the V3IO resources
|
|
@@ -440,3 +462,169 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
440
462
|
address=mlrun.mlconf.v3io_framesd,
|
|
441
463
|
container=v3io_container,
|
|
442
464
|
)
|
|
465
|
+
|
|
466
|
+
def read_metrics_data(
|
|
467
|
+
self,
|
|
468
|
+
*,
|
|
469
|
+
endpoint_id: str,
|
|
470
|
+
start: datetime,
|
|
471
|
+
end: datetime,
|
|
472
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
473
|
+
type: Literal["metrics", "results"] = "results",
|
|
474
|
+
) -> Union[
|
|
475
|
+
list[
|
|
476
|
+
Union[
|
|
477
|
+
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
478
|
+
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
479
|
+
],
|
|
480
|
+
],
|
|
481
|
+
list[
|
|
482
|
+
Union[
|
|
483
|
+
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
484
|
+
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
485
|
+
],
|
|
486
|
+
],
|
|
487
|
+
]:
|
|
488
|
+
"""
|
|
489
|
+
Read metrics OR results from the TSDB and return as a list.
|
|
490
|
+
Note: the type must match the actual metrics in the `metrics` parameter.
|
|
491
|
+
If the type is "results", pass only results in the `metrics` parameter.
|
|
492
|
+
"""
|
|
493
|
+
|
|
494
|
+
if type == "metrics":
|
|
495
|
+
table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
496
|
+
name = mm_schemas.MetricData.METRIC_NAME
|
|
497
|
+
df_handler = self.df_to_metrics_values
|
|
498
|
+
elif type == "results":
|
|
499
|
+
table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
500
|
+
name = mm_schemas.ResultData.RESULT_NAME
|
|
501
|
+
df_handler = self.df_to_results_values
|
|
502
|
+
else:
|
|
503
|
+
raise ValueError(f"Invalid {type = }")
|
|
504
|
+
|
|
505
|
+
query = self._get_sql_query(
|
|
506
|
+
endpoint_id,
|
|
507
|
+
[(metric.app, metric.name) for metric in metrics],
|
|
508
|
+
table_path=table_path,
|
|
509
|
+
name=name,
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
logger.debug("Querying V3IO TSDB", query=query)
|
|
513
|
+
|
|
514
|
+
df: pd.DataFrame = self._frames_client.read(
|
|
515
|
+
backend=_TSDB_BE,
|
|
516
|
+
start=start,
|
|
517
|
+
end=end,
|
|
518
|
+
query=query, # the filter argument does not work for this complex condition
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
logger.debug(
|
|
522
|
+
"Converting a DataFrame to a list of metrics or results values",
|
|
523
|
+
table=table_path,
|
|
524
|
+
project=self.project,
|
|
525
|
+
endpoint_id=endpoint_id,
|
|
526
|
+
is_empty=df.empty,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
530
|
+
|
|
531
|
+
@staticmethod
|
|
532
|
+
def _get_sql_query(
|
|
533
|
+
endpoint_id: str,
|
|
534
|
+
names: list[tuple[str, str]],
|
|
535
|
+
table_path: str,
|
|
536
|
+
name: str = mm_schemas.ResultData.RESULT_NAME,
|
|
537
|
+
) -> str:
|
|
538
|
+
"""Get the SQL query for the results/metrics table"""
|
|
539
|
+
with StringIO() as query:
|
|
540
|
+
query.write(
|
|
541
|
+
f"SELECT * FROM '{table_path}' "
|
|
542
|
+
f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
|
|
543
|
+
)
|
|
544
|
+
if names:
|
|
545
|
+
query.write(" AND (")
|
|
546
|
+
|
|
547
|
+
for i, (app_name, result_name) in enumerate(names):
|
|
548
|
+
sub_cond = (
|
|
549
|
+
f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}' "
|
|
550
|
+
f"AND {name}='{result_name}')"
|
|
551
|
+
)
|
|
552
|
+
if i != 0: # not first sub condition
|
|
553
|
+
query.write(" OR ")
|
|
554
|
+
query.write(sub_cond)
|
|
555
|
+
|
|
556
|
+
query.write(")")
|
|
557
|
+
|
|
558
|
+
query.write(";")
|
|
559
|
+
return query.getvalue()
|
|
560
|
+
|
|
561
|
+
def read_predictions(
|
|
562
|
+
self,
|
|
563
|
+
*,
|
|
564
|
+
endpoint_id: str,
|
|
565
|
+
start: Union[datetime, str],
|
|
566
|
+
end: Union[datetime, str],
|
|
567
|
+
aggregation_window: Optional[str] = None,
|
|
568
|
+
agg_funcs: Optional[list[str]] = None,
|
|
569
|
+
limit: Optional[int] = None,
|
|
570
|
+
) -> Union[
|
|
571
|
+
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
572
|
+
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
573
|
+
]:
|
|
574
|
+
if (agg_funcs and not aggregation_window) or (
|
|
575
|
+
aggregation_window and not agg_funcs
|
|
576
|
+
):
|
|
577
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
578
|
+
"both or neither of `aggregation_window` and `agg_funcs` must be provided"
|
|
579
|
+
)
|
|
580
|
+
df = self._get_records(
|
|
581
|
+
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
582
|
+
start=start,
|
|
583
|
+
end=end,
|
|
584
|
+
columns=[mm_schemas.EventFieldType.LATENCY],
|
|
585
|
+
filter_query=f"endpoint_id=='{endpoint_id}'",
|
|
586
|
+
interval=aggregation_window,
|
|
587
|
+
agg_funcs=agg_funcs,
|
|
588
|
+
limit=limit,
|
|
589
|
+
sliding_window_step=aggregation_window,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
full_name = get_invocations_fqn(self.project)
|
|
593
|
+
|
|
594
|
+
if df.empty:
|
|
595
|
+
return mm_schemas.ModelEndpointMonitoringMetricNoData(
|
|
596
|
+
full_name=full_name,
|
|
597
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
latency_column = (
|
|
601
|
+
f"{agg_funcs[0]}({mm_schemas.EventFieldType.LATENCY})"
|
|
602
|
+
if agg_funcs
|
|
603
|
+
else mm_schemas.EventFieldType.LATENCY
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
return mm_schemas.ModelEndpointMonitoringMetricValues(
|
|
607
|
+
full_name=full_name,
|
|
608
|
+
values=list(
|
|
609
|
+
zip(
|
|
610
|
+
df.index,
|
|
611
|
+
df[latency_column],
|
|
612
|
+
)
|
|
613
|
+
), # pyright: ignore[reportArgumentType]
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
def read_prediction_metric_for_endpoint_if_exists(
|
|
617
|
+
self, endpoint_id: str
|
|
618
|
+
) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
619
|
+
# Read just one record, because we just want to check if there is any data for this endpoint_id
|
|
620
|
+
predictions = self.read_predictions(
|
|
621
|
+
endpoint_id=endpoint_id, start="0", end="now", limit=1
|
|
622
|
+
)
|
|
623
|
+
if predictions:
|
|
624
|
+
return mm_schemas.ModelEndpointMonitoringMetric(
|
|
625
|
+
project=self.project,
|
|
626
|
+
app=mm_schemas.SpecialApps.MLRUN_INFRA,
|
|
627
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
|
|
628
|
+
name=mm_schemas.PredictionsQueryConstants.INVOCATIONS,
|
|
629
|
+
full_name=get_invocations_fqn(self.project),
|
|
630
|
+
)
|
|
@@ -24,6 +24,10 @@ import mlrun.common.schemas
|
|
|
24
24
|
from mlrun.common.schemas.model_monitoring import (
|
|
25
25
|
EventFieldType,
|
|
26
26
|
)
|
|
27
|
+
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
28
|
+
ModelEndpointMonitoringMetricType,
|
|
29
|
+
_compose_full_name,
|
|
30
|
+
)
|
|
27
31
|
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
28
32
|
from mlrun.utils import logger
|
|
29
33
|
|
|
@@ -111,6 +115,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
|
111
115
|
)
|
|
112
116
|
|
|
113
117
|
|
|
118
|
+
def get_tsdb_connection_string(
|
|
119
|
+
secret_provider: typing.Optional[typing.Callable] = None,
|
|
120
|
+
) -> str:
|
|
121
|
+
"""Get TSDB connection string from the project secret. If wasn't set, take it from the system
|
|
122
|
+
configurations.
|
|
123
|
+
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
124
|
+
:return: Valid TSDB connection string.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
return (
|
|
128
|
+
mlrun.get_secret_or_env(
|
|
129
|
+
key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
|
|
130
|
+
secret_provider=secret_provider,
|
|
131
|
+
)
|
|
132
|
+
or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
114
136
|
def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
|
|
115
137
|
"""
|
|
116
138
|
Convert a batch dictionary to timedelta.
|
|
@@ -260,3 +282,26 @@ def get_endpoint_record(project: str, endpoint_id: str):
|
|
|
260
282
|
project=project,
|
|
261
283
|
)
|
|
262
284
|
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def get_result_instance_fqn(
|
|
288
|
+
model_endpoint_id: str, app_name: str, result_name: str
|
|
289
|
+
) -> str:
|
|
290
|
+
return f"{model_endpoint_id}.{app_name}.result.{result_name}"
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
294
|
+
return get_result_instance_fqn(
|
|
295
|
+
model_endpoint_id,
|
|
296
|
+
mm_constants.HistogramDataDriftApplicationConstants.NAME,
|
|
297
|
+
mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def get_invocations_fqn(project: str) -> str:
|
|
302
|
+
return _compose_full_name(
|
|
303
|
+
project=project,
|
|
304
|
+
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
305
|
+
name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
|
|
306
|
+
type=ModelEndpointMonitoringMetricType.METRIC,
|
|
307
|
+
)
|