mlrun 1.9.0rc3__py3-none-any.whl → 1.9.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +13 -3
- mlrun/artifacts/base.py +5 -5
- mlrun/artifacts/dataset.py +1 -1
- mlrun/artifacts/model.py +1 -1
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +7 -0
- mlrun/common/runtimes/constants.py +1 -1
- mlrun/common/schemas/artifact.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +32 -8
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/project.py +1 -1
- mlrun/common/schemas/runs.py +1 -1
- mlrun/config.py +5 -11
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/datastore_profile.py +2 -2
- mlrun/datastore/sources.py +3 -3
- mlrun/datastore/targets.py +4 -4
- mlrun/datastore/utils.py +2 -2
- mlrun/db/base.py +9 -7
- mlrun/db/httpdb.py +48 -27
- mlrun/db/nopdb.py +3 -1
- mlrun/execution.py +1 -1
- mlrun/frameworks/_common/model_handler.py +2 -2
- mlrun/launcher/client.py +1 -1
- mlrun/model_monitoring/api.py +4 -4
- mlrun/model_monitoring/applications/_application_steps.py +3 -1
- mlrun/model_monitoring/applications/evidently/base.py +59 -71
- mlrun/model_monitoring/controller.py +26 -11
- mlrun/model_monitoring/db/tsdb/base.py +3 -1
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +27 -49
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +48 -35
- mlrun/model_monitoring/tracking_policy.py +1 -1
- mlrun/model_monitoring/writer.py +1 -1
- mlrun/projects/operations.py +3 -3
- mlrun/projects/project.py +37 -22
- mlrun/render.py +5 -9
- mlrun/run.py +1 -1
- mlrun/runtimes/base.py +5 -5
- mlrun/runtimes/kubejob.py +2 -2
- mlrun/runtimes/nuclio/function.py +3 -3
- mlrun/runtimes/nuclio/serving.py +4 -4
- mlrun/runtimes/utils.py +25 -8
- mlrun/utils/helpers.py +3 -2
- mlrun/utils/notifications/notification/webhook.py +18 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.9.0rc3.dist-info → mlrun-1.9.0rc5.dist-info}/METADATA +9 -13
- {mlrun-1.9.0rc3.dist-info → mlrun-1.9.0rc5.dist-info}/RECORD +52 -51
- {mlrun-1.9.0rc3.dist-info → mlrun-1.9.0rc5.dist-info}/WHEEL +1 -1
- {mlrun-1.9.0rc3.dist-info → mlrun-1.9.0rc5.dist-info}/entry_points.txt +0 -0
- {mlrun-1.9.0rc3.dist-info → mlrun-1.9.0rc5.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.9.0rc3.dist-info → mlrun-1.9.0rc5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import traceback
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Final, Optional, Union
|
|
19
|
+
|
|
20
|
+
import taosws
|
|
21
|
+
from taosws import TaosStmt
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class _StrEnum(str, Enum):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TimestampPrecision(_StrEnum):
|
|
29
|
+
ms = "ms" # milliseconds
|
|
30
|
+
us = "us" # microseconds
|
|
31
|
+
ns = "ns" # nanoseconds
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_TS_PRECISION_TO_FACTOR_AND_FUNC: Final[
|
|
35
|
+
dict[TimestampPrecision, tuple[int, Callable[[list[int]], taosws.PyColumnView]]]
|
|
36
|
+
] = {
|
|
37
|
+
TimestampPrecision.ms: (10**3, taosws.millis_timestamps_to_column),
|
|
38
|
+
TimestampPrecision.us: (10**6, taosws.micros_timestamps_to_column),
|
|
39
|
+
TimestampPrecision.ns: (10**9, taosws.nanos_timestamps_to_column),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class QueryResult:
|
|
44
|
+
def __init__(self, data, fields):
|
|
45
|
+
self.data = data
|
|
46
|
+
self.fields = fields
|
|
47
|
+
|
|
48
|
+
def __eq__(self, other):
|
|
49
|
+
return self.data == other.data and self.fields == other.fields
|
|
50
|
+
|
|
51
|
+
def __repr__(self):
|
|
52
|
+
return f"QueryResult({self.data}, {self.fields})"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Field:
|
|
56
|
+
def __init__(self, name, type, bytes):
|
|
57
|
+
self.name = name
|
|
58
|
+
self.type = type
|
|
59
|
+
self.bytes = bytes
|
|
60
|
+
|
|
61
|
+
def __eq__(self, other):
|
|
62
|
+
return (
|
|
63
|
+
self.name == other.name
|
|
64
|
+
and self.type == other.type
|
|
65
|
+
and self.bytes == other.bytes
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def __repr__(self):
|
|
69
|
+
return f"Field({self.name}, {self.type}, {self.bytes})"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TDEngineError(Exception):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ErrorResult:
|
|
77
|
+
def __init__(self, tb, err):
|
|
78
|
+
self.tb = tb
|
|
79
|
+
self.err = err
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _get_timestamp_column(
|
|
83
|
+
values: list, timestamp_precision: TimestampPrecision
|
|
84
|
+
) -> taosws.PyColumnView:
|
|
85
|
+
factor, to_col_func = _TS_PRECISION_TO_FACTOR_AND_FUNC[timestamp_precision]
|
|
86
|
+
timestamps = [round(timestamp.timestamp() * factor) for timestamp in values]
|
|
87
|
+
return to_col_func(timestamps)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def values_to_column(
|
|
91
|
+
values: list,
|
|
92
|
+
column_type: str,
|
|
93
|
+
timestamp_precision: TimestampPrecision = TimestampPrecision.ms,
|
|
94
|
+
) -> taosws.PyColumnView:
|
|
95
|
+
if column_type == "TIMESTAMP":
|
|
96
|
+
return _get_timestamp_column(values, timestamp_precision)
|
|
97
|
+
if column_type == "FLOAT":
|
|
98
|
+
return taosws.floats_to_column(values)
|
|
99
|
+
if column_type == "INT":
|
|
100
|
+
return taosws.ints_to_column(values)
|
|
101
|
+
if column_type.startswith("BINARY"):
|
|
102
|
+
return taosws.binary_to_column(values)
|
|
103
|
+
|
|
104
|
+
raise NotImplementedError(f"Unsupported column type '{column_type}'")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class Statement:
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
columns: dict[str, str],
|
|
111
|
+
subtable: str,
|
|
112
|
+
values: dict[str, Any],
|
|
113
|
+
timestamp_precision: str = TimestampPrecision.ms,
|
|
114
|
+
) -> None:
|
|
115
|
+
self.columns = columns
|
|
116
|
+
self.subtable = subtable
|
|
117
|
+
self.values = values
|
|
118
|
+
self.timestamp_precision = TimestampPrecision[timestamp_precision]
|
|
119
|
+
|
|
120
|
+
def prepare(self, statement: TaosStmt) -> TaosStmt:
|
|
121
|
+
question_marks = ", ".join("?" * len(self.columns))
|
|
122
|
+
statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
|
|
123
|
+
statement.set_tbname(self.subtable)
|
|
124
|
+
|
|
125
|
+
bind_params = []
|
|
126
|
+
|
|
127
|
+
for col_name, col_type in self.columns.items():
|
|
128
|
+
val = self.values[col_name]
|
|
129
|
+
bind_params.append(
|
|
130
|
+
values_to_column(
|
|
131
|
+
[val], col_type, timestamp_precision=self.timestamp_precision
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
statement.bind_param(bind_params)
|
|
136
|
+
statement.add_batch()
|
|
137
|
+
return statement
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _run(connection_string, prefix_statements, q, statements, query):
|
|
141
|
+
try:
|
|
142
|
+
conn = taosws.connect(connection_string)
|
|
143
|
+
|
|
144
|
+
for statement in prefix_statements + statements:
|
|
145
|
+
if isinstance(statement, Statement):
|
|
146
|
+
prepared_statement = statement.prepare(conn.statement())
|
|
147
|
+
prepared_statement.execute()
|
|
148
|
+
else:
|
|
149
|
+
conn.execute(statement)
|
|
150
|
+
|
|
151
|
+
if not query:
|
|
152
|
+
q.put(None)
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
res = conn.query(query)
|
|
156
|
+
|
|
157
|
+
# taosws.TaosField is not serializable
|
|
158
|
+
fields = [
|
|
159
|
+
Field(field.name(), field.type(), field.bytes()) for field in res.fields
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
q.put(QueryResult(list(res), fields))
|
|
163
|
+
except Exception as e:
|
|
164
|
+
tb = traceback.format_exc()
|
|
165
|
+
q.put(ErrorResult(tb, e))
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class TDEngineConnection:
|
|
169
|
+
def __init__(self, connection_string):
|
|
170
|
+
self._connection_string = connection_string
|
|
171
|
+
self.prefix_statements = []
|
|
172
|
+
|
|
173
|
+
self._conn = taosws.connect(self._connection_string)
|
|
174
|
+
|
|
175
|
+
def run(
|
|
176
|
+
self,
|
|
177
|
+
statements: Optional[Union[str, Statement, list[Union[str, Statement]]]] = None,
|
|
178
|
+
query: Optional[str] = None,
|
|
179
|
+
) -> Optional[QueryResult]:
|
|
180
|
+
statements = statements or []
|
|
181
|
+
if not isinstance(statements, list):
|
|
182
|
+
statements = [statements]
|
|
183
|
+
|
|
184
|
+
for statement in self.prefix_statements + statements:
|
|
185
|
+
if isinstance(statement, Statement):
|
|
186
|
+
try:
|
|
187
|
+
prepared_statement = statement.prepare(self._conn.statement())
|
|
188
|
+
prepared_statement.execute()
|
|
189
|
+
except taosws.Error as e:
|
|
190
|
+
raise TDEngineError(
|
|
191
|
+
f"Failed to run prepared statement `{self._conn.statement()}`: {e}"
|
|
192
|
+
) from e
|
|
193
|
+
else:
|
|
194
|
+
try:
|
|
195
|
+
self._conn.execute(statement)
|
|
196
|
+
except taosws.Error as e:
|
|
197
|
+
raise TDEngineError(
|
|
198
|
+
f"Failed to run statement `{statement}`: {e}"
|
|
199
|
+
) from e
|
|
200
|
+
|
|
201
|
+
if not query:
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
res = self._conn.query(query)
|
|
206
|
+
except taosws.Error as e:
|
|
207
|
+
raise TDEngineError(f"Failed to run query `{query}`: {e}") from e
|
|
208
|
+
|
|
209
|
+
fields = [
|
|
210
|
+
Field(field.name(), field.type(), field.bytes()) for field in res.fields
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
return QueryResult(list(res), fields)
|
|
@@ -12,17 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import asyncio
|
|
16
15
|
from datetime import datetime, timedelta
|
|
17
16
|
from threading import Lock
|
|
18
17
|
from typing import Callable, Final, Literal, Optional, Union
|
|
19
18
|
|
|
20
19
|
import pandas as pd
|
|
21
20
|
import taosws
|
|
22
|
-
from taoswswrap.tdengine_connection import (
|
|
23
|
-
Statement,
|
|
24
|
-
TDEngineConnection,
|
|
25
|
-
)
|
|
26
21
|
|
|
27
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
28
23
|
import mlrun.common.types
|
|
@@ -30,6 +25,10 @@ import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
|
30
25
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
31
26
|
from mlrun.datastore.datastore_profile import DatastoreProfile
|
|
32
27
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
28
|
+
from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
|
|
29
|
+
Statement,
|
|
30
|
+
TDEngineConnection,
|
|
31
|
+
)
|
|
33
32
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
34
33
|
from mlrun.utils import logger
|
|
35
34
|
|
|
@@ -75,12 +74,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
75
74
|
|
|
76
75
|
self._init_super_tables()
|
|
77
76
|
|
|
78
|
-
self._run_directly = (
|
|
79
|
-
mlrun.mlconf.model_endpoint_monitoring.tdengine.run_directly
|
|
80
|
-
)
|
|
81
|
-
self._timeout = mlrun.mlconf.model_endpoint_monitoring.tdengine.timeout
|
|
82
|
-
self._retries = mlrun.mlconf.model_endpoint_monitoring.tdengine.retries
|
|
83
|
-
|
|
84
77
|
@property
|
|
85
78
|
def connection(self) -> TDEngineConnection:
|
|
86
79
|
global _connection
|
|
@@ -98,7 +91,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
98
91
|
"""Establish a connection to the TSDB server."""
|
|
99
92
|
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
100
93
|
conn = TDEngineConnection(
|
|
101
|
-
self._tdengine_connection_profile.dsn(),
|
|
94
|
+
self._tdengine_connection_profile.dsn(),
|
|
102
95
|
)
|
|
103
96
|
conn.prefix_statements = [f"USE {self.database}"]
|
|
104
97
|
|
|
@@ -126,8 +119,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
126
119
|
self.connection.prefix_statements = []
|
|
127
120
|
self.connection.run(
|
|
128
121
|
statements=f"CREATE DATABASE IF NOT EXISTS {self.database} PRECISION '{self._timestamp_precision}'",
|
|
129
|
-
timeout=self._timeout,
|
|
130
|
-
retries=self._retries,
|
|
131
122
|
)
|
|
132
123
|
self.connection.prefix_statements = [f"USE {self.database}"]
|
|
133
124
|
logger.debug(
|
|
@@ -147,8 +138,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
147
138
|
conn = self.connection
|
|
148
139
|
conn.run(
|
|
149
140
|
statements=create_table_query,
|
|
150
|
-
timeout=self._timeout,
|
|
151
|
-
retries=self._retries,
|
|
152
141
|
)
|
|
153
142
|
|
|
154
143
|
def write_application_event(
|
|
@@ -208,8 +197,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
208
197
|
create_table_sql,
|
|
209
198
|
insert_statement,
|
|
210
199
|
],
|
|
211
|
-
timeout=self._timeout,
|
|
212
|
-
retries=self._retries,
|
|
213
200
|
)
|
|
214
201
|
|
|
215
202
|
@staticmethod
|
|
@@ -313,7 +300,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
313
300
|
)
|
|
314
301
|
|
|
315
302
|
def delete_tsdb_records(
|
|
316
|
-
self,
|
|
303
|
+
self,
|
|
304
|
+
endpoint_ids: list[str],
|
|
317
305
|
):
|
|
318
306
|
"""
|
|
319
307
|
To delete subtables within TDEngine, we first query the subtables names with the provided endpoint_ids.
|
|
@@ -334,8 +322,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
334
322
|
)
|
|
335
323
|
subtables_result = self.connection.run(
|
|
336
324
|
query=get_subtable_query,
|
|
337
|
-
timeout=self._timeout,
|
|
338
|
-
retries=self._retries,
|
|
339
325
|
)
|
|
340
326
|
subtables.extend([subtable[0] for subtable in subtables_result.data])
|
|
341
327
|
except Exception as e:
|
|
@@ -356,8 +342,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
356
342
|
try:
|
|
357
343
|
self.connection.run(
|
|
358
344
|
statements=drop_statements,
|
|
359
|
-
timeout=delete_timeout or self._timeout,
|
|
360
|
-
retries=self._retries,
|
|
361
345
|
)
|
|
362
346
|
except Exception as e:
|
|
363
347
|
logger.warning(
|
|
@@ -388,8 +372,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
388
372
|
try:
|
|
389
373
|
self.connection.run(
|
|
390
374
|
statements=drop_statements,
|
|
391
|
-
timeout=self._timeout,
|
|
392
|
-
retries=self._retries,
|
|
393
375
|
)
|
|
394
376
|
except Exception as e:
|
|
395
377
|
logger.warning(
|
|
@@ -413,8 +395,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
413
395
|
try:
|
|
414
396
|
table_name = self.connection.run(
|
|
415
397
|
query=query_random_table_name,
|
|
416
|
-
timeout=self._timeout,
|
|
417
|
-
retries=self._retries,
|
|
418
398
|
)
|
|
419
399
|
if len(table_name.data) == 0:
|
|
420
400
|
# no tables were found under the database
|
|
@@ -437,8 +417,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
437
417
|
try:
|
|
438
418
|
self.connection.run(
|
|
439
419
|
statements=drop_database_query,
|
|
440
|
-
timeout=self._timeout,
|
|
441
|
-
retries=self._retries,
|
|
442
420
|
)
|
|
443
421
|
logger.debug(
|
|
444
422
|
"The TDEngine database has been successfully dropped",
|
|
@@ -531,7 +509,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
531
509
|
logger.debug("Querying TDEngine", query=full_query)
|
|
532
510
|
try:
|
|
533
511
|
query_result = self.connection.run(
|
|
534
|
-
query=full_query,
|
|
512
|
+
query=full_query,
|
|
535
513
|
)
|
|
536
514
|
except taosws.QueryError as e:
|
|
537
515
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -908,6 +886,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
908
886
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
909
887
|
project: str,
|
|
910
888
|
run_in_threadpool: Callable,
|
|
889
|
+
metric_list: Optional[list[str]] = None,
|
|
911
890
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
912
891
|
"""
|
|
913
892
|
Add basic metrics to the model endpoint object.
|
|
@@ -916,24 +895,28 @@ class TDEngineConnector(TSDBConnector):
|
|
|
916
895
|
be filled with the relevant basic metrics.
|
|
917
896
|
:param project: The name of the project.
|
|
918
897
|
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
898
|
+
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
919
899
|
|
|
920
900
|
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
921
901
|
"""
|
|
922
902
|
|
|
923
903
|
uids = [mep.metadata.uid for mep in model_endpoint_objects]
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
904
|
+
|
|
905
|
+
metric_name_to_function = {
|
|
906
|
+
"error_count": self.get_error_count,
|
|
907
|
+
"last_request": self.get_last_request,
|
|
908
|
+
"avg_latency": self.get_avg_latency,
|
|
909
|
+
"result_status": self.get_drift_status,
|
|
910
|
+
}
|
|
911
|
+
if metric_list is not None:
|
|
912
|
+
for metric_name in list(metric_name_to_function):
|
|
913
|
+
if metric_name not in metric_list:
|
|
914
|
+
del metric_name_to_function[metric_name]
|
|
915
|
+
|
|
916
|
+
metric_name_to_df = {
|
|
917
|
+
metric_name: function(endpoint_ids=uids)
|
|
918
|
+
for metric_name, function in metric_name_to_function.items()
|
|
919
|
+
}
|
|
937
920
|
|
|
938
921
|
def add_metrics(
|
|
939
922
|
mep: mlrun.common.schemas.ModelEndpoint,
|
|
@@ -955,12 +938,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
955
938
|
map(
|
|
956
939
|
lambda mep: add_metrics(
|
|
957
940
|
mep=mep,
|
|
958
|
-
df_dictionary=
|
|
959
|
-
"error_count": error_count_df,
|
|
960
|
-
"last_request": last_request_df,
|
|
961
|
-
"avg_latency": avg_latency_df,
|
|
962
|
-
"result_status": drift_status_df,
|
|
963
|
-
},
|
|
941
|
+
df_dictionary=metric_name_to_df,
|
|
964
942
|
),
|
|
965
943
|
model_endpoint_objects,
|
|
966
944
|
)
|
|
@@ -455,12 +455,20 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
455
455
|
# Delete all tables
|
|
456
456
|
tables = mm_schemas.V3IOTSDBTables.list()
|
|
457
457
|
for table_to_delete in tables:
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
458
|
+
if table_to_delete in self.tables:
|
|
459
|
+
try:
|
|
460
|
+
self.frames_client.delete(
|
|
461
|
+
backend=_TSDB_BE, table=self.tables[table_to_delete]
|
|
462
|
+
)
|
|
463
|
+
except v3io_frames.DeleteError as e:
|
|
464
|
+
logger.warning(
|
|
465
|
+
f"Failed to delete TSDB table '{table_to_delete}'",
|
|
466
|
+
err=mlrun.errors.err_to_str(e),
|
|
467
|
+
)
|
|
468
|
+
else:
|
|
461
469
|
logger.warning(
|
|
462
|
-
f"
|
|
463
|
-
|
|
470
|
+
f"Skipping deletion: table '{table_to_delete}' is not among the initialized tables.",
|
|
471
|
+
initialized_tables=list(self.tables.keys()),
|
|
464
472
|
)
|
|
465
473
|
|
|
466
474
|
# Final cleanup of tsdb path
|
|
@@ -470,7 +478,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
470
478
|
store.rm(tsdb_path, recursive=True)
|
|
471
479
|
|
|
472
480
|
def delete_tsdb_records(
|
|
473
|
-
self,
|
|
481
|
+
self,
|
|
482
|
+
endpoint_ids: list[str],
|
|
474
483
|
):
|
|
475
484
|
logger.debug(
|
|
476
485
|
"Deleting model endpoints resources using the V3IO TSDB connector",
|
|
@@ -1085,6 +1094,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1085
1094
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
1086
1095
|
project: str,
|
|
1087
1096
|
run_in_threadpool: Callable,
|
|
1097
|
+
metric_list: Optional[list[str]] = None,
|
|
1088
1098
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
1089
1099
|
"""
|
|
1090
1100
|
Fetch basic metrics from V3IO TSDB and add them to MEP objects.
|
|
@@ -1093,6 +1103,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1093
1103
|
be filled with the relevant basic metrics.
|
|
1094
1104
|
:param project: The name of the project.
|
|
1095
1105
|
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
1106
|
+
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
1096
1107
|
|
|
1097
1108
|
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
1098
1109
|
"""
|
|
@@ -1104,15 +1115,27 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1104
1115
|
uids.append(uid)
|
|
1105
1116
|
model_endpoint_objects_by_uid[uid] = model_endpoint_object
|
|
1106
1117
|
|
|
1107
|
-
|
|
1108
|
-
self.get_error_count,
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1118
|
+
metric_name_to_function_and_column_name = {
|
|
1119
|
+
"error_count": (self.get_error_count, "count(error_count)"),
|
|
1120
|
+
"avg_latency": (self.get_avg_latency, "avg(latency)"),
|
|
1121
|
+
"result_status": (self.get_drift_status, "max(result_status)"),
|
|
1122
|
+
}
|
|
1123
|
+
if metric_list is not None:
|
|
1124
|
+
for metric_name in list(metric_name_to_function_and_column_name):
|
|
1125
|
+
if metric_name not in metric_list:
|
|
1126
|
+
del metric_name_to_function_and_column_name[metric_name]
|
|
1127
|
+
|
|
1128
|
+
metric_name_to_result = {}
|
|
1129
|
+
|
|
1130
|
+
for metric_name, (
|
|
1131
|
+
function,
|
|
1132
|
+
_,
|
|
1133
|
+
) in metric_name_to_function_and_column_name.items():
|
|
1134
|
+
metric_name_to_result[metric_name] = await run_in_threadpool(
|
|
1135
|
+
function,
|
|
1136
|
+
endpoint_ids=uids,
|
|
1137
|
+
get_raw=True,
|
|
1138
|
+
)
|
|
1116
1139
|
|
|
1117
1140
|
def add_metric(
|
|
1118
1141
|
metric: str,
|
|
@@ -1128,26 +1151,16 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1128
1151
|
if mep and value is not None and not math.isnan(value):
|
|
1129
1152
|
setattr(mep.status, metric, value)
|
|
1130
1153
|
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
)
|
|
1142
|
-
add_metric(
|
|
1143
|
-
"result_status",
|
|
1144
|
-
"max(result_status)",
|
|
1145
|
-
drift_status_res,
|
|
1146
|
-
)
|
|
1147
|
-
|
|
1148
|
-
self._enrich_mep_with_last_request(
|
|
1149
|
-
model_endpoint_objects_by_uid=model_endpoint_objects_by_uid
|
|
1150
|
-
)
|
|
1154
|
+
for metric_name, result in metric_name_to_result.items():
|
|
1155
|
+
add_metric(
|
|
1156
|
+
metric_name,
|
|
1157
|
+
metric_name_to_function_and_column_name[metric_name][1],
|
|
1158
|
+
result,
|
|
1159
|
+
)
|
|
1160
|
+
if metric_list is None or "last_request" in metric_list:
|
|
1161
|
+
self._enrich_mep_with_last_request(
|
|
1162
|
+
model_endpoint_objects_by_uid=model_endpoint_objects_by_uid
|
|
1163
|
+
)
|
|
1151
1164
|
|
|
1152
1165
|
return list(model_endpoint_objects_by_uid.values())
|
|
1153
1166
|
|
|
@@ -57,7 +57,7 @@ class TrackingPolicy(mlrun.model.ModelObj):
|
|
|
57
57
|
"""
|
|
58
58
|
warnings.warn(
|
|
59
59
|
"The `TrackingPolicy` class is deprecated from version 1.7.0 and is not "
|
|
60
|
-
"used anymore. It will be removed in 1.
|
|
60
|
+
"used anymore. It will be removed in 1.10.0.",
|
|
61
61
|
FutureWarning,
|
|
62
62
|
)
|
|
63
63
|
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -129,7 +129,7 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
129
129
|
)
|
|
130
130
|
kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
|
|
131
131
|
result_event = _AppResultEvent(json.loads(event.pop(WriterEvent.DATA, "{}")))
|
|
132
|
-
if not result_event: # BC for < 1.7.0, can be removed in 1.
|
|
132
|
+
if not result_event: # BC for < 1.7.0, can be removed in 1.10.0
|
|
133
133
|
result_event = _AppResultEvent(event)
|
|
134
134
|
else:
|
|
135
135
|
result_event.update(_AppResultEvent(event))
|
mlrun/projects/operations.py
CHANGED
|
@@ -294,9 +294,9 @@ def build_function(
|
|
|
294
294
|
:param force_build: Force building the image, even when no changes were made
|
|
295
295
|
"""
|
|
296
296
|
if not overwrite_build_params:
|
|
297
|
-
# TODO: change overwrite_build_params default to True in 1.
|
|
297
|
+
# TODO: change overwrite_build_params default to True in 1.10.0
|
|
298
298
|
warnings.warn(
|
|
299
|
-
"The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.
|
|
299
|
+
"The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.10.0.",
|
|
300
300
|
mlrun.utils.OverwriteBuildParamsWarning,
|
|
301
301
|
)
|
|
302
302
|
|
|
@@ -325,7 +325,7 @@ def build_function(
|
|
|
325
325
|
skip_deployed=skip_deployed,
|
|
326
326
|
)
|
|
327
327
|
else:
|
|
328
|
-
# TODO: remove filter once overwrite_build_params default is changed to True in 1.
|
|
328
|
+
# TODO: remove filter once overwrite_build_params default is changed to True in 1.10.0
|
|
329
329
|
with warnings.catch_warnings():
|
|
330
330
|
warnings.simplefilter(
|
|
331
331
|
"ignore", category=mlrun.utils.OverwriteBuildParamsWarning
|