mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +30 -27
- mlrun/common/constants.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/alert.py +3 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
- mlrun/common/schemas/notification.py +1 -0
- mlrun/config.py +1 -1
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/alibaba_oss.py +3 -2
- mlrun/datastore/azure_blob.py +7 -9
- mlrun/datastore/base.py +13 -1
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +84 -29
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +3 -2
- mlrun/datastore/sources.py +54 -0
- mlrun/datastore/storeytargets.py +147 -0
- mlrun/datastore/targets.py +76 -122
- mlrun/datastore/v3io.py +1 -0
- mlrun/db/httpdb.py +6 -1
- mlrun/errors.py +8 -0
- mlrun/execution.py +7 -0
- mlrun/feature_store/api.py +5 -0
- mlrun/feature_store/retrieval/job.py +1 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/api.py +10 -2
- mlrun/model_monitoring/applications/_application_steps.py +52 -34
- mlrun/model_monitoring/applications/context.py +206 -70
- mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
- mlrun/model_monitoring/controller.py +15 -12
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
- mlrun/model_monitoring/helpers.py +54 -18
- mlrun/model_monitoring/stream_processing.py +10 -29
- mlrun/projects/pipelines.py +19 -30
- mlrun/projects/project.py +86 -67
- mlrun/run.py +8 -6
- mlrun/runtimes/__init__.py +4 -0
- mlrun/runtimes/nuclio/api_gateway.py +18 -0
- mlrun/runtimes/nuclio/application/application.py +150 -59
- mlrun/runtimes/nuclio/function.py +5 -11
- mlrun/runtimes/nuclio/serving.py +2 -2
- mlrun/runtimes/utils.py +16 -0
- mlrun/serving/routers.py +1 -1
- mlrun/serving/server.py +19 -5
- mlrun/serving/states.py +8 -0
- mlrun/serving/v2_serving.py +34 -26
- mlrun/utils/helpers.py +33 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
|
@@ -20,7 +20,7 @@ import pandas as pd
|
|
|
20
20
|
import sqlalchemy
|
|
21
21
|
import sqlalchemy.exc
|
|
22
22
|
import sqlalchemy.orm
|
|
23
|
-
from sqlalchemy.engine import make_url
|
|
23
|
+
from sqlalchemy.engine import Engine, make_url
|
|
24
24
|
from sqlalchemy.sql.elements import BinaryExpression
|
|
25
25
|
|
|
26
26
|
import mlrun.common.model_monitoring.helpers
|
|
@@ -61,9 +61,15 @@ class SQLStoreBase(StoreBase):
|
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
self._sql_connection_string = kwargs.get("store_connection_string")
|
|
64
|
-
self._engine =
|
|
64
|
+
self._engine = None
|
|
65
65
|
self._init_tables()
|
|
66
66
|
|
|
67
|
+
@property
|
|
68
|
+
def engine(self) -> Engine:
|
|
69
|
+
if not self._engine:
|
|
70
|
+
self._engine = get_engine(dsn=self._sql_connection_string)
|
|
71
|
+
return self._engine
|
|
72
|
+
|
|
67
73
|
def create_tables(self):
|
|
68
74
|
self._create_tables_if_not_exist()
|
|
69
75
|
|
|
@@ -116,7 +122,7 @@ class SQLStoreBase(StoreBase):
|
|
|
116
122
|
:param table_name: Target table name.
|
|
117
123
|
:param event: Event dictionary that will be written into the DB.
|
|
118
124
|
"""
|
|
119
|
-
with self.
|
|
125
|
+
with self.engine.connect() as connection:
|
|
120
126
|
# Convert the result into a pandas Dataframe and write it into the database
|
|
121
127
|
event_df = pd.DataFrame([event])
|
|
122
128
|
event_df.to_sql(table_name, con=connection, index=False, if_exists="append")
|
|
@@ -177,7 +183,7 @@ class SQLStoreBase(StoreBase):
|
|
|
177
183
|
param table: SQLAlchemy declarative table.
|
|
178
184
|
:param criteria: A list of binary expressions that filter the query.
|
|
179
185
|
"""
|
|
180
|
-
if not self.
|
|
186
|
+
if not self.engine.has_table(table.__tablename__):
|
|
181
187
|
logger.debug(
|
|
182
188
|
f"Table {table.__tablename__} does not exist in the database. Skipping deletion."
|
|
183
189
|
)
|
|
@@ -524,9 +530,9 @@ class SQLStoreBase(StoreBase):
|
|
|
524
530
|
for table in self._tables:
|
|
525
531
|
# Create table if not exist. The `metadata` contains the `ModelEndpointsTable`
|
|
526
532
|
db_name = make_url(self._sql_connection_string).database
|
|
527
|
-
if not self.
|
|
533
|
+
if not self.engine.has_table(table):
|
|
528
534
|
logger.info(f"Creating table {table} on {db_name} db.")
|
|
529
|
-
self._tables[table].metadata.create_all(bind=self.
|
|
535
|
+
self._tables[table].metadata.create_all(bind=self.engine)
|
|
530
536
|
else:
|
|
531
537
|
logger.info(f"Table {table} already exists on {db_name} db.")
|
|
532
538
|
|
|
@@ -574,8 +580,11 @@ class SQLStoreBase(StoreBase):
|
|
|
574
580
|
"""
|
|
575
581
|
Delete all the model monitoring resources of the project in the SQL tables.
|
|
576
582
|
"""
|
|
583
|
+
logger.debug(
|
|
584
|
+
"Deleting model monitoring endpoints resources from the SQL tables",
|
|
585
|
+
project=self.project,
|
|
586
|
+
)
|
|
577
587
|
endpoints = self.list_model_endpoints()
|
|
578
|
-
logger.debug("Deleting model monitoring resources", project=self.project)
|
|
579
588
|
|
|
580
589
|
for endpoint_dict in endpoints:
|
|
581
590
|
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
|
|
@@ -612,7 +621,7 @@ class SQLStoreBase(StoreBase):
|
|
|
612
621
|
|
|
613
622
|
# Note: the block below does not use self._get, as we need here all the
|
|
614
623
|
# results, not only `one_or_none`.
|
|
615
|
-
with sqlalchemy.orm.Session(self.
|
|
624
|
+
with sqlalchemy.orm.Session(self.engine) as session:
|
|
616
625
|
metric_rows = (
|
|
617
626
|
session.query(table) # pyright: ignore[reportOptionalCall]
|
|
618
627
|
.filter(table.endpoint_id == endpoint_id)
|
|
@@ -20,6 +20,7 @@ from http import HTTPStatus
|
|
|
20
20
|
import v3io.dataplane
|
|
21
21
|
import v3io.dataplane.output
|
|
22
22
|
import v3io.dataplane.response
|
|
23
|
+
from v3io.dataplane import Client as V3IOClient
|
|
23
24
|
|
|
24
25
|
import mlrun.common.model_monitoring.helpers
|
|
25
26
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
@@ -34,11 +35,11 @@ fields_to_encode_decode = [
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
_METRIC_FIELDS: list[str] = [
|
|
37
|
-
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
38
|
-
mm_schemas.MetricData.METRIC_NAME,
|
|
39
|
-
mm_schemas.MetricData.METRIC_VALUE,
|
|
40
|
-
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
41
|
-
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
38
|
+
mm_schemas.WriterEvent.APPLICATION_NAME.value,
|
|
39
|
+
mm_schemas.MetricData.METRIC_NAME.value,
|
|
40
|
+
mm_schemas.MetricData.METRIC_VALUE.value,
|
|
41
|
+
mm_schemas.WriterEvent.START_INFER_TIME.value,
|
|
42
|
+
mm_schemas.WriterEvent.END_INFER_TIME.value,
|
|
42
43
|
]
|
|
43
44
|
|
|
44
45
|
|
|
@@ -100,13 +101,18 @@ class KVStoreBase(StoreBase):
|
|
|
100
101
|
project: str,
|
|
101
102
|
) -> None:
|
|
102
103
|
super().__init__(project=project)
|
|
103
|
-
|
|
104
|
-
self.client = mlrun.utils.v3io_clients.get_v3io_client(
|
|
105
|
-
endpoint=mlrun.mlconf.v3io_api,
|
|
106
|
-
)
|
|
104
|
+
self._client = None
|
|
107
105
|
# Get the KV table path and container
|
|
108
106
|
self.path, self.container = self._get_path_and_container()
|
|
109
107
|
|
|
108
|
+
@property
|
|
109
|
+
def client(self) -> V3IOClient:
|
|
110
|
+
if not self._client:
|
|
111
|
+
self._client = mlrun.utils.v3io_clients.get_v3io_client(
|
|
112
|
+
endpoint=mlrun.mlconf.v3io_api,
|
|
113
|
+
)
|
|
114
|
+
return self._client
|
|
115
|
+
|
|
110
116
|
def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
|
|
111
117
|
"""
|
|
112
118
|
Create a new endpoint record in the KV table.
|
|
@@ -285,6 +291,10 @@ class KVStoreBase(StoreBase):
|
|
|
285
291
|
"""
|
|
286
292
|
Delete all model endpoints resources in V3IO KV.
|
|
287
293
|
"""
|
|
294
|
+
logger.debug(
|
|
295
|
+
"Deleting model monitoring endpoints resources in V3IO KV",
|
|
296
|
+
project=self.project,
|
|
297
|
+
)
|
|
288
298
|
|
|
289
299
|
endpoints = self.list_model_endpoints()
|
|
290
300
|
|
|
@@ -17,6 +17,8 @@ from dataclasses import dataclass
|
|
|
17
17
|
from io import StringIO
|
|
18
18
|
from typing import Optional, Union
|
|
19
19
|
|
|
20
|
+
import taosws
|
|
21
|
+
|
|
20
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
21
23
|
import mlrun.common.types
|
|
22
24
|
|
|
@@ -28,6 +30,9 @@ class _TDEngineColumnType:
|
|
|
28
30
|
self.data_type = data_type
|
|
29
31
|
self.length = length
|
|
30
32
|
|
|
33
|
+
def values_to_column(self, values):
|
|
34
|
+
raise NotImplementedError()
|
|
35
|
+
|
|
31
36
|
def __str__(self):
|
|
32
37
|
if self.length is not None:
|
|
33
38
|
return f"{self.data_type}({self.length})"
|
|
@@ -44,6 +49,26 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
|
|
|
44
49
|
BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
|
|
45
50
|
|
|
46
51
|
|
|
52
|
+
def values_to_column(values, column_type):
|
|
53
|
+
if column_type == _TDEngineColumn.TIMESTAMP:
|
|
54
|
+
timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
|
|
55
|
+
return taosws.millis_timestamps_to_column(timestamps)
|
|
56
|
+
if column_type == _TDEngineColumn.FLOAT:
|
|
57
|
+
return taosws.floats_to_column(values)
|
|
58
|
+
if column_type == _TDEngineColumn.INT:
|
|
59
|
+
return taosws.ints_to_column(values)
|
|
60
|
+
if column_type == _TDEngineColumn.BINARY_40:
|
|
61
|
+
return taosws.binary_to_column(values)
|
|
62
|
+
if column_type == _TDEngineColumn.BINARY_64:
|
|
63
|
+
return taosws.binary_to_column(values)
|
|
64
|
+
if column_type == _TDEngineColumn.BINARY_10000:
|
|
65
|
+
return taosws.binary_to_column(values)
|
|
66
|
+
|
|
67
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
68
|
+
f"unsupported column type '{column_type}'"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
47
72
|
@dataclass
|
|
48
73
|
class TDEngineSchema:
|
|
49
74
|
"""
|
|
@@ -55,13 +80,14 @@ class TDEngineSchema:
|
|
|
55
80
|
def __init__(
|
|
56
81
|
self,
|
|
57
82
|
super_table: str,
|
|
58
|
-
columns: dict[str,
|
|
83
|
+
columns: dict[str, _TDEngineColumn],
|
|
59
84
|
tags: dict[str, str],
|
|
85
|
+
database: Optional[str] = None,
|
|
60
86
|
):
|
|
61
87
|
self.super_table = super_table
|
|
62
88
|
self.columns = columns
|
|
63
89
|
self.tags = tags
|
|
64
|
-
self.database = _MODEL_MONITORING_DATABASE
|
|
90
|
+
self.database = database or _MODEL_MONITORING_DATABASE
|
|
65
91
|
|
|
66
92
|
def _create_super_table_query(self) -> str:
|
|
67
93
|
columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
|
|
@@ -83,11 +109,23 @@ class TDEngineSchema:
|
|
|
83
109
|
|
|
84
110
|
def _insert_subtable_query(
|
|
85
111
|
self,
|
|
112
|
+
connection: taosws.Connection,
|
|
86
113
|
subtable: str,
|
|
87
114
|
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
88
|
-
) ->
|
|
89
|
-
|
|
90
|
-
|
|
115
|
+
) -> taosws.TaosStmt:
|
|
116
|
+
stmt = connection.statement()
|
|
117
|
+
question_marks = ", ".join("?" * len(self.columns))
|
|
118
|
+
stmt.prepare(f"INSERT INTO ? VALUES ({question_marks});")
|
|
119
|
+
stmt.set_tbname_tags(subtable, [])
|
|
120
|
+
|
|
121
|
+
bind_params = []
|
|
122
|
+
|
|
123
|
+
for col_name, col_type in self.columns.items():
|
|
124
|
+
val = values[col_name]
|
|
125
|
+
bind_params.append(values_to_column([val], col_type))
|
|
126
|
+
|
|
127
|
+
stmt.bind_param(bind_params)
|
|
128
|
+
return stmt
|
|
91
129
|
|
|
92
130
|
def _delete_subtable_query(
|
|
93
131
|
self,
|
|
@@ -188,53 +226,53 @@ class TDEngineSchema:
|
|
|
188
226
|
|
|
189
227
|
@dataclass
|
|
190
228
|
class AppResultTable(TDEngineSchema):
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
database = _MODEL_MONITORING_DATABASE
|
|
229
|
+
def __init__(self, database: Optional[str] = None):
|
|
230
|
+
super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
231
|
+
columns = {
|
|
232
|
+
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
233
|
+
mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
234
|
+
mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
|
|
235
|
+
mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
|
|
236
|
+
}
|
|
237
|
+
tags = {
|
|
238
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
239
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
240
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
241
|
+
mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
|
|
242
|
+
mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
|
|
243
|
+
}
|
|
244
|
+
super().__init__(super_table, columns, tags, database)
|
|
208
245
|
|
|
209
246
|
|
|
210
247
|
@dataclass
|
|
211
248
|
class Metrics(TDEngineSchema):
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
249
|
+
def __init__(self, database: Optional[str] = None):
|
|
250
|
+
super_table = mm_schemas.TDEngineSuperTables.METRICS
|
|
251
|
+
columns = {
|
|
252
|
+
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
253
|
+
mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
254
|
+
mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
|
|
255
|
+
}
|
|
256
|
+
tags = {
|
|
257
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
258
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
259
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
260
|
+
mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
|
|
261
|
+
}
|
|
262
|
+
super().__init__(super_table, columns, tags, database)
|
|
226
263
|
|
|
227
264
|
|
|
228
265
|
@dataclass
|
|
229
266
|
class Predictions(TDEngineSchema):
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
267
|
+
def __init__(self, database: Optional[str] = None):
|
|
268
|
+
super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
269
|
+
columns = {
|
|
270
|
+
mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
|
|
271
|
+
mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
|
|
272
|
+
mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
|
|
273
|
+
}
|
|
274
|
+
tags = {
|
|
275
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
276
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
277
|
+
}
|
|
278
|
+
super().__init__(super_table, columns, tags, database)
|
|
@@ -47,10 +47,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
47
47
|
)
|
|
48
48
|
self._tdengine_connection_string = kwargs.get("connection_string")
|
|
49
49
|
self.database = database
|
|
50
|
-
|
|
50
|
+
|
|
51
|
+
self._connection = None
|
|
51
52
|
self._init_super_tables()
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
@property
|
|
55
|
+
def connection(self) -> taosws.Connection:
|
|
56
|
+
if not self._connection:
|
|
57
|
+
self._connection = self._create_connection()
|
|
58
|
+
return self._connection
|
|
59
|
+
|
|
60
|
+
def _create_connection(self) -> taosws.Connection:
|
|
54
61
|
"""Establish a connection to the TSDB server."""
|
|
55
62
|
conn = taosws.connect(self._tdengine_connection_string)
|
|
56
63
|
try:
|
|
@@ -58,15 +65,26 @@ class TDEngineConnector(TSDBConnector):
|
|
|
58
65
|
except taosws.QueryError:
|
|
59
66
|
# Database already exists
|
|
60
67
|
pass
|
|
61
|
-
|
|
68
|
+
try:
|
|
69
|
+
conn.execute(f"USE {self.database}")
|
|
70
|
+
except taosws.QueryError as e:
|
|
71
|
+
raise mlrun.errors.MLRunTSDBConnectionFailure(
|
|
72
|
+
f"Failed to use TDEngine database {self.database}, {mlrun.errors.err_to_str(e)}"
|
|
73
|
+
)
|
|
62
74
|
return conn
|
|
63
75
|
|
|
64
76
|
def _init_super_tables(self):
|
|
65
77
|
"""Initialize the super tables for the TSDB."""
|
|
66
78
|
self.tables = {
|
|
67
|
-
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
68
|
-
|
|
69
|
-
|
|
79
|
+
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
80
|
+
self.database
|
|
81
|
+
),
|
|
82
|
+
mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
|
|
83
|
+
self.database
|
|
84
|
+
),
|
|
85
|
+
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
86
|
+
self.database
|
|
87
|
+
),
|
|
70
88
|
}
|
|
71
89
|
|
|
72
90
|
def create_tables(self):
|
|
@@ -97,6 +115,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
97
115
|
table_name = (
|
|
98
116
|
f"{table_name}_" f"{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
99
117
|
).replace("-", "_")
|
|
118
|
+
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
100
119
|
|
|
101
120
|
else:
|
|
102
121
|
# Write a new metric
|
|
@@ -105,14 +124,30 @@ class TDEngineConnector(TSDBConnector):
|
|
|
105
124
|
f"{table_name}_" f"{event[mm_schemas.MetricData.METRIC_NAME]}"
|
|
106
125
|
).replace("-", "_")
|
|
107
126
|
|
|
127
|
+
# Convert the datetime strings to datetime objects
|
|
128
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
|
|
129
|
+
val=event[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
130
|
+
)
|
|
131
|
+
event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
|
|
132
|
+
val=event[mm_schemas.WriterEvent.START_INFER_TIME]
|
|
133
|
+
)
|
|
134
|
+
|
|
108
135
|
create_table_query = table._create_subtable_query(
|
|
109
136
|
subtable=table_name, values=event
|
|
110
137
|
)
|
|
111
138
|
self._connection.execute(create_table_query)
|
|
112
|
-
|
|
113
|
-
|
|
139
|
+
|
|
140
|
+
insert_statement = table._insert_subtable_query(
|
|
141
|
+
self._connection,
|
|
142
|
+
subtable=table_name,
|
|
143
|
+
values=event,
|
|
114
144
|
)
|
|
115
|
-
|
|
145
|
+
insert_statement.add_batch()
|
|
146
|
+
insert_statement.execute()
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
150
|
+
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
116
151
|
|
|
117
152
|
def apply_monitoring_stream_steps(self, graph):
|
|
118
153
|
"""
|
|
@@ -148,7 +183,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
148
183
|
mm_schemas.EventFieldType.PROJECT,
|
|
149
184
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
150
185
|
],
|
|
151
|
-
max_events=
|
|
186
|
+
max_events=1000,
|
|
187
|
+
flush_after_seconds=30,
|
|
152
188
|
)
|
|
153
189
|
|
|
154
190
|
apply_process_before_tsdb()
|
|
@@ -24,6 +24,7 @@ import mlrun.common.model_monitoring
|
|
|
24
24
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
25
25
|
import mlrun.feature_store.steps
|
|
26
26
|
import mlrun.utils.v3io_clients
|
|
27
|
+
from mlrun.common.schemas import EventFieldType
|
|
27
28
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
28
29
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
29
30
|
from mlrun.utils import logger
|
|
@@ -64,14 +65,17 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
64
65
|
self.container = container
|
|
65
66
|
|
|
66
67
|
self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
|
|
67
|
-
self._frames_client: v3io_frames.client.ClientBase =
|
|
68
|
-
self._get_v3io_frames_client(self.container)
|
|
69
|
-
)
|
|
70
|
-
|
|
68
|
+
self._frames_client: Optional[v3io_frames.client.ClientBase] = None
|
|
71
69
|
self._init_tables_path()
|
|
70
|
+
self._create_table = create_table
|
|
72
71
|
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
@property
|
|
73
|
+
def frames_client(self) -> v3io_frames.client.ClientBase:
|
|
74
|
+
if not self._frames_client:
|
|
75
|
+
self._frames_client = self._get_v3io_frames_client(self.container)
|
|
76
|
+
if self._create_table:
|
|
77
|
+
self.create_tables()
|
|
78
|
+
return self._frames_client
|
|
75
79
|
|
|
76
80
|
def _init_tables_path(self):
|
|
77
81
|
self.tables = {}
|
|
@@ -151,7 +155,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
151
155
|
for table_name in application_tables:
|
|
152
156
|
logger.info("Creating table in V3IO TSDB", table_name=table_name)
|
|
153
157
|
table = self.tables[table_name]
|
|
154
|
-
self.
|
|
158
|
+
self.frames_client.create(
|
|
155
159
|
backend=_TSDB_BE,
|
|
156
160
|
table=table,
|
|
157
161
|
if_exists=v3io_frames.IGNORE,
|
|
@@ -161,8 +165,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
161
165
|
def apply_monitoring_stream_steps(
|
|
162
166
|
self,
|
|
163
167
|
graph,
|
|
164
|
-
tsdb_batching_max_events: int =
|
|
165
|
-
tsdb_batching_timeout_secs: int =
|
|
168
|
+
tsdb_batching_max_events: int = 1000,
|
|
169
|
+
tsdb_batching_timeout_secs: int = 30,
|
|
170
|
+
sample_window: int = 10,
|
|
166
171
|
):
|
|
167
172
|
"""
|
|
168
173
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
@@ -173,6 +178,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
173
178
|
- endpoint_features (Prediction and feature names and values)
|
|
174
179
|
- custom_metrics (user-defined metrics)
|
|
175
180
|
"""
|
|
181
|
+
|
|
176
182
|
# Write latency per prediction, labeled by endpoint ID only
|
|
177
183
|
graph.add_step(
|
|
178
184
|
"storey.TSDBTarget",
|
|
@@ -197,17 +203,23 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
197
203
|
key=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
198
204
|
)
|
|
199
205
|
|
|
206
|
+
# Emits the event in window size of events based on sample_window size (10 by default)
|
|
207
|
+
graph.add_step(
|
|
208
|
+
"storey.steps.SampleWindow",
|
|
209
|
+
name="sample",
|
|
210
|
+
after="Rename",
|
|
211
|
+
window_size=sample_window,
|
|
212
|
+
key=EventFieldType.ENDPOINT_ID,
|
|
213
|
+
)
|
|
214
|
+
|
|
200
215
|
# Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
|
|
201
216
|
# stats and details about the events
|
|
202
217
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
apply_process_before_tsdb()
|
|
218
|
+
graph.add_step(
|
|
219
|
+
"mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
|
|
220
|
+
name="ProcessBeforeTSDB",
|
|
221
|
+
after="sample",
|
|
222
|
+
)
|
|
211
223
|
|
|
212
224
|
# Unpacked keys from each dictionary and write to TSDB target
|
|
213
225
|
def apply_filter_and_unpacked_keys(name, keys):
|
|
@@ -273,8 +285,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
273
285
|
def handle_model_error(
|
|
274
286
|
self,
|
|
275
287
|
graph,
|
|
276
|
-
tsdb_batching_max_events: int =
|
|
277
|
-
tsdb_batching_timeout_secs: int =
|
|
288
|
+
tsdb_batching_max_events: int = 1000,
|
|
289
|
+
tsdb_batching_timeout_secs: int = 30,
|
|
278
290
|
**kwargs,
|
|
279
291
|
) -> None:
|
|
280
292
|
graph.add_step(
|
|
@@ -326,12 +338,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
326
338
|
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
327
339
|
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
328
340
|
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
329
|
-
|
|
341
|
+
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
342
|
+
# TODO: remove this when extra data is supported (ML-7460)
|
|
343
|
+
event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
|
|
330
344
|
else:
|
|
331
345
|
raise ValueError(f"Invalid {kind = }")
|
|
332
346
|
|
|
333
347
|
try:
|
|
334
|
-
self.
|
|
348
|
+
self.frames_client.write(
|
|
335
349
|
backend=_TSDB_BE,
|
|
336
350
|
table=table,
|
|
337
351
|
dfs=pd.DataFrame.from_records([event]),
|
|
@@ -358,7 +372,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
358
372
|
tables = mm_schemas.V3IOTSDBTables.list()
|
|
359
373
|
for table_to_delete in tables:
|
|
360
374
|
try:
|
|
361
|
-
self.
|
|
375
|
+
self.frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
|
|
362
376
|
except v3io_frames.DeleteError as e:
|
|
363
377
|
logger.warning(
|
|
364
378
|
f"Failed to delete TSDB table '{table}'",
|
|
@@ -474,7 +488,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
474
488
|
aggregators = ",".join(agg_funcs) if agg_funcs else None
|
|
475
489
|
table_path = self.tables[table]
|
|
476
490
|
try:
|
|
477
|
-
df = self.
|
|
491
|
+
df = self.frames_client.read(
|
|
478
492
|
backend=_TSDB_BE,
|
|
479
493
|
table=table_path,
|
|
480
494
|
start=start,
|
|
@@ -577,7 +591,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
577
591
|
|
|
578
592
|
logger.debug("Querying V3IO TSDB", query=query)
|
|
579
593
|
|
|
580
|
-
df: pd.DataFrame = self.
|
|
594
|
+
df: pd.DataFrame = self.frames_client.read(
|
|
581
595
|
backend=_TSDB_BE,
|
|
582
596
|
start=start,
|
|
583
597
|
end=end,
|