mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +21 -15
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +9 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +13 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +34 -21
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/base.py +265 -7
- mlrun/datastore/datastore.py +10 -5
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +367 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +211 -74
- mlrun/datastore/model_provider/openai_provider.py +243 -71
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +27 -19
- mlrun/db/httpdb.py +57 -48
- mlrun/db/nopdb.py +25 -10
- mlrun/execution.py +55 -13
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +2 -0
- mlrun/model.py +9 -3
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +388 -138
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +36 -13
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +29 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
- mlrun/model_monitoring/helpers.py +28 -5
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +16 -11
- mlrun/projects/pipelines.py +2 -2
- mlrun/projects/project.py +157 -69
- mlrun/run.py +97 -20
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +147 -17
- mlrun/runtimes/nuclio/function.py +72 -27
- mlrun/runtimes/nuclio/serving.py +102 -20
- mlrun/runtimes/pod.py +213 -21
- mlrun/runtimes/utils.py +49 -9
- mlrun/secrets.py +54 -13
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +230 -40
- mlrun/serving/states.py +605 -232
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +136 -81
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +215 -83
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/mail.py +38 -15
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0
|
@@ -16,7 +16,7 @@ import json
|
|
|
16
16
|
import sys
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
18
|
from contextlib import AbstractContextManager
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import datetime
|
|
20
20
|
from types import TracebackType
|
|
21
21
|
from typing import TYPE_CHECKING, Final, Optional
|
|
22
22
|
|
|
@@ -162,19 +162,29 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
|
|
|
162
162
|
endpoint_id=model_endpoint.metadata.uid,
|
|
163
163
|
)
|
|
164
164
|
|
|
165
|
-
def get_application_time(self, application: str) -> Optional[
|
|
165
|
+
def get_application_time(self, application: str) -> Optional[float]:
|
|
166
166
|
self._check_open_schedules()
|
|
167
167
|
return self._schedules.get(application)
|
|
168
168
|
|
|
169
|
-
def update_application_time(self, application: str, timestamp:
|
|
169
|
+
def update_application_time(self, application: str, timestamp: float) -> None:
|
|
170
170
|
self._check_open_schedules()
|
|
171
|
-
self._schedules[application] = timestamp
|
|
171
|
+
self._schedules[application] = float(timestamp)
|
|
172
|
+
|
|
173
|
+
def delete_application_time(self, application: str) -> None:
|
|
174
|
+
self._check_open_schedules()
|
|
175
|
+
if application in self._schedules:
|
|
176
|
+
logger.debug(
|
|
177
|
+
"Deleting application time from schedules",
|
|
178
|
+
application=application,
|
|
179
|
+
endpoint_id=self._endpoint_id,
|
|
180
|
+
)
|
|
181
|
+
del self._schedules[application]
|
|
172
182
|
|
|
173
183
|
def get_application_list(self) -> set[str]:
|
|
174
184
|
self._check_open_schedules()
|
|
175
185
|
return set(self._schedules.keys())
|
|
176
186
|
|
|
177
|
-
def get_min_timestamp(self) -> Optional[
|
|
187
|
+
def get_min_timestamp(self) -> Optional[float]:
|
|
178
188
|
self._check_open_schedules()
|
|
179
189
|
return min(self._schedules.values(), default=None)
|
|
180
190
|
|
|
@@ -198,7 +208,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
198
208
|
project=self._project
|
|
199
209
|
)
|
|
200
210
|
|
|
201
|
-
def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[
|
|
211
|
+
def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[float]:
|
|
202
212
|
self._check_open_schedules()
|
|
203
213
|
if endpoint_uid in self._schedules:
|
|
204
214
|
return self._schedules[endpoint_uid].get(
|
|
@@ -208,15 +218,19 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
208
218
|
return None
|
|
209
219
|
|
|
210
220
|
def update_endpoint_timestamps(
|
|
211
|
-
self, endpoint_uid: str, last_request:
|
|
221
|
+
self, endpoint_uid: str, last_request: float, last_analyzed: float
|
|
212
222
|
) -> None:
|
|
213
223
|
self._check_open_schedules()
|
|
214
224
|
self._schedules[endpoint_uid] = {
|
|
215
|
-
schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST:
|
|
216
|
-
|
|
225
|
+
schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: float(
|
|
226
|
+
last_request
|
|
227
|
+
),
|
|
228
|
+
schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: float(
|
|
229
|
+
last_analyzed
|
|
230
|
+
),
|
|
217
231
|
}
|
|
218
232
|
|
|
219
|
-
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[
|
|
233
|
+
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[float]:
|
|
220
234
|
self._check_open_schedules()
|
|
221
235
|
if endpoint_uid in self._schedules:
|
|
222
236
|
return self._schedules[endpoint_uid].get(
|
|
@@ -267,9 +281,18 @@ class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
|
|
|
267
281
|
self, endpoint_uid: str, last_analyzed: datetime
|
|
268
282
|
) -> None:
|
|
269
283
|
self._check_open_schedules()
|
|
270
|
-
self._schedules[endpoint_uid] = last_analyzed.
|
|
271
|
-
|
|
272
|
-
|
|
284
|
+
self._schedules[endpoint_uid] = last_analyzed.isoformat()
|
|
285
|
+
|
|
286
|
+
def delete_endpoints_last_analyzed(self, endpoint_uids: list[str]) -> None:
|
|
287
|
+
self._check_open_schedules()
|
|
288
|
+
for endpoint_uid in endpoint_uids:
|
|
289
|
+
if endpoint_uid in self._schedules:
|
|
290
|
+
logger.debug(
|
|
291
|
+
"Deleting endpoint last analyzed from schedules",
|
|
292
|
+
endpoint_uid=endpoint_uid,
|
|
293
|
+
application=self._application,
|
|
294
|
+
)
|
|
295
|
+
del self._schedules[endpoint_uid]
|
|
273
296
|
|
|
274
297
|
|
|
275
298
|
def _delete_folder(folder: str) -> None:
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import json
|
|
16
|
+
import typing
|
|
16
17
|
from abc import abstractmethod
|
|
17
18
|
from datetime import datetime, timezone
|
|
18
19
|
from typing import cast
|
|
@@ -73,7 +74,7 @@ class ModelMonitoringStatsFile(abc.ABC):
|
|
|
73
74
|
path=self._item.url,
|
|
74
75
|
)
|
|
75
76
|
|
|
76
|
-
def read(self) -> tuple[dict, datetime]:
|
|
77
|
+
def read(self) -> tuple[dict, typing.Optional[datetime]]:
|
|
77
78
|
"""
|
|
78
79
|
Read the stats data and timestamp saved in file
|
|
79
80
|
:return: tuple[dict, str] dictionary with stats data and timestamp saved in file
|
|
@@ -99,13 +100,13 @@ class ModelMonitoringStatsFile(abc.ABC):
|
|
|
99
100
|
):
|
|
100
101
|
raise
|
|
101
102
|
|
|
102
|
-
logger.
|
|
103
|
+
logger.warning(
|
|
103
104
|
"The Stats file was not found. It should have been created "
|
|
104
105
|
"as a part of the model endpoint's creation",
|
|
105
106
|
path=self._path,
|
|
106
107
|
error=err,
|
|
107
108
|
)
|
|
108
|
-
|
|
109
|
+
return {}, None
|
|
109
110
|
|
|
110
111
|
def write(self, stats: dict, timestamp: datetime) -> None:
|
|
111
112
|
"""
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
from datetime import datetime, timedelta
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import pydantic.v1
|
|
@@ -60,6 +60,16 @@ class TSDBConnector(ABC):
|
|
|
60
60
|
"""
|
|
61
61
|
pass
|
|
62
62
|
|
|
63
|
+
def apply_writer_steps(self, graph, after, **kwargs) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Apply TSDB steps on the provided writer graph. Throughout these steps, the graph stores metrics / results.
|
|
66
|
+
This data is being used by mlrun UI and the monitoring dashboards in grafana.
|
|
67
|
+
There are 2 different key metric dictionaries that are being generated throughout these steps:
|
|
68
|
+
- metrics (user-defined metrics) - model monitoring application metrics
|
|
69
|
+
- results (user-defined results) - model monitoring application results
|
|
70
|
+
"""
|
|
71
|
+
pass
|
|
72
|
+
|
|
63
73
|
@abstractmethod
|
|
64
74
|
def handle_model_error(self, graph, **kwargs) -> None:
|
|
65
75
|
"""
|
|
@@ -96,14 +106,23 @@ class TSDBConnector(ABC):
|
|
|
96
106
|
"""
|
|
97
107
|
|
|
98
108
|
@abstractmethod
|
|
99
|
-
def delete_tsdb_records(
|
|
100
|
-
self,
|
|
101
|
-
endpoint_ids: list[str],
|
|
102
|
-
) -> None:
|
|
109
|
+
def delete_tsdb_records(self, endpoint_ids: list[str]) -> None:
|
|
103
110
|
"""
|
|
104
111
|
Delete model endpoint records from the TSDB connector.
|
|
112
|
+
|
|
105
113
|
:param endpoint_ids: List of model endpoint unique identifiers.
|
|
106
|
-
|
|
114
|
+
"""
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def delete_application_records(
|
|
119
|
+
self, application_name: str, endpoint_ids: Optional[list[str]] = None
|
|
120
|
+
) -> None:
|
|
121
|
+
"""
|
|
122
|
+
Delete application records from the TSDB for the given model endpoints or all if ``None``.
|
|
123
|
+
|
|
124
|
+
:param application_name: The name of the application to delete records for.
|
|
125
|
+
:param endpoint_ids: List of model endpoint unique identifiers.
|
|
107
126
|
"""
|
|
108
127
|
pass
|
|
109
128
|
|
|
@@ -425,11 +444,9 @@ class TSDBConnector(ABC):
|
|
|
425
444
|
]
|
|
426
445
|
"""
|
|
427
446
|
|
|
428
|
-
|
|
447
|
+
def add_basic_metrics(
|
|
429
448
|
self,
|
|
430
449
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
431
|
-
project: str,
|
|
432
|
-
run_in_threadpool: Callable,
|
|
433
450
|
metric_list: Optional[list[str]] = None,
|
|
434
451
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
435
452
|
raise NotImplementedError()
|
|
@@ -774,3 +791,6 @@ class TSDBConnector(ABC):
|
|
|
774
791
|
)
|
|
775
792
|
)
|
|
776
793
|
return mm_schemas.ModelEndpointDriftValues(values=values)
|
|
794
|
+
|
|
795
|
+
def add_pre_writer_steps(self, graph, after):
|
|
796
|
+
return None
|
|
@@ -122,10 +122,7 @@ class TDEngineSchema:
|
|
|
122
122
|
)
|
|
123
123
|
return f"DELETE FROM {self.database}.{subtable} WHERE {values};"
|
|
124
124
|
|
|
125
|
-
def drop_subtable_query(
|
|
126
|
-
self,
|
|
127
|
-
subtable: str,
|
|
128
|
-
) -> str:
|
|
125
|
+
def drop_subtable_query(self, subtable: str) -> str:
|
|
129
126
|
return f"DROP TABLE if EXISTS {self.database}.`{subtable}`;"
|
|
130
127
|
|
|
131
128
|
def drop_supertable_query(self) -> str:
|
|
@@ -145,8 +142,10 @@ class TDEngineSchema:
|
|
|
145
142
|
values = f" {operator} ".join(
|
|
146
143
|
f"{filter_tag} LIKE '{val}'" for val in filter_values
|
|
147
144
|
)
|
|
145
|
+
return self._get_tables_query_by_condition(values)
|
|
148
146
|
|
|
149
|
-
|
|
147
|
+
def _get_tables_query_by_condition(self, condition: str) -> str:
|
|
148
|
+
return f"SELECT DISTINCT TBNAME FROM {self.database}.{self.super_table} WHERE {condition};"
|
|
150
149
|
|
|
151
150
|
@staticmethod
|
|
152
151
|
def _get_records_query(
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import threading
|
|
16
16
|
from datetime import datetime, timedelta
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Final, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import taosws
|
|
@@ -22,7 +22,7 @@ import taosws
|
|
|
22
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
23
|
import mlrun.common.types
|
|
24
24
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
25
|
-
|
|
25
|
+
from mlrun.config import config
|
|
26
26
|
from mlrun.datastore.datastore_profile import DatastoreProfile
|
|
27
27
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
28
28
|
from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
|
|
@@ -55,14 +55,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
55
55
|
"""
|
|
56
56
|
|
|
57
57
|
type: str = mm_schemas.TSDBTarget.TDEngine
|
|
58
|
-
database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
|
|
59
58
|
|
|
60
59
|
def __init__(
|
|
61
60
|
self,
|
|
62
61
|
project: str,
|
|
63
62
|
profile: DatastoreProfile,
|
|
64
63
|
timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
|
|
65
|
-
**kwargs,
|
|
66
64
|
):
|
|
67
65
|
super().__init__(project=project)
|
|
68
66
|
|
|
@@ -72,6 +70,15 @@ class TDEngineConnector(TSDBConnector):
|
|
|
72
70
|
timestamp_precision
|
|
73
71
|
)
|
|
74
72
|
|
|
73
|
+
if not mlrun.mlconf.system_id:
|
|
74
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
75
|
+
"system_id is not set in mlrun.mlconf. "
|
|
76
|
+
"TDEngineConnector requires system_id to be configured for database name construction. "
|
|
77
|
+
"Please ensure MLRun configuration is properly loaded before creating TDEngineConnector."
|
|
78
|
+
)
|
|
79
|
+
self.database = (
|
|
80
|
+
f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
|
|
81
|
+
)
|
|
75
82
|
self._init_super_tables()
|
|
76
83
|
|
|
77
84
|
@property
|
|
@@ -205,7 +212,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
205
212
|
@staticmethod
|
|
206
213
|
def _generate_filter_query(
|
|
207
214
|
filter_column: str, filter_values: Union[str, list[Union[str, int]]]
|
|
208
|
-
) ->
|
|
215
|
+
) -> str:
|
|
209
216
|
"""
|
|
210
217
|
Generate a filter query for TDEngine based on the provided column and values.
|
|
211
218
|
|
|
@@ -213,15 +220,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
213
220
|
:param filter_values: A single value or a list of values to filter by.
|
|
214
221
|
|
|
215
222
|
:return: A string representing the filter query.
|
|
216
|
-
:raise:
|
|
223
|
+
:raise: ``MLRunValueError`` if the filter values are not of type string or list.
|
|
217
224
|
"""
|
|
218
|
-
|
|
219
225
|
if isinstance(filter_values, str):
|
|
220
226
|
return f"{filter_column}='{filter_values}'"
|
|
221
227
|
elif isinstance(filter_values, list):
|
|
222
228
|
return f"{filter_column} IN ({', '.join(repr(v) for v in filter_values)}) "
|
|
223
229
|
else:
|
|
224
|
-
raise mlrun.errors.
|
|
230
|
+
raise mlrun.errors.MLRunValueError(
|
|
225
231
|
f"Invalid filter values {filter_values}: must be a string or a list, "
|
|
226
232
|
f"got {type(filter_values).__name__}; filter values: {filter_values}"
|
|
227
233
|
)
|
|
@@ -279,6 +285,65 @@ class TDEngineConnector(TSDBConnector):
|
|
|
279
285
|
after="ProcessBeforeTDEngine",
|
|
280
286
|
)
|
|
281
287
|
|
|
288
|
+
def add_pre_writer_steps(self, graph, after):
|
|
289
|
+
return graph.add_step(
|
|
290
|
+
"mlrun.model_monitoring.db.tsdb.tdengine.writer_graph_steps.ProcessBeforeTDEngine",
|
|
291
|
+
name="ProcessBeforeTDEngine",
|
|
292
|
+
after=after,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def apply_writer_steps(self, graph, after, **kwargs) -> None:
|
|
296
|
+
graph.add_step(
|
|
297
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
298
|
+
name="tsdb_metrics",
|
|
299
|
+
after=after,
|
|
300
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
301
|
+
supertable=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
302
|
+
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
303
|
+
time_col=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
304
|
+
database=self.database,
|
|
305
|
+
graph_shape="cylinder",
|
|
306
|
+
columns=[
|
|
307
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
308
|
+
mm_schemas.MetricData.METRIC_VALUE,
|
|
309
|
+
],
|
|
310
|
+
tag_cols=[
|
|
311
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
312
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
313
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
314
|
+
],
|
|
315
|
+
max_events=config.model_endpoint_monitoring.writer_graph.max_events,
|
|
316
|
+
flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
graph.add_step(
|
|
320
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
321
|
+
name="tsdb_app_results",
|
|
322
|
+
after=after,
|
|
323
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
324
|
+
supertable=self.tables[
|
|
325
|
+
mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
326
|
+
].super_table,
|
|
327
|
+
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
328
|
+
time_col=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
329
|
+
database=self.database,
|
|
330
|
+
graph_shape="cylinder",
|
|
331
|
+
columns=[
|
|
332
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
333
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
334
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
335
|
+
mm_schemas.ResultData.RESULT_EXTRA_DATA,
|
|
336
|
+
],
|
|
337
|
+
tag_cols=[
|
|
338
|
+
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
339
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
340
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
341
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
342
|
+
],
|
|
343
|
+
max_events=config.model_endpoint_monitoring.writer_graph.max_events,
|
|
344
|
+
flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
|
|
345
|
+
)
|
|
346
|
+
|
|
282
347
|
def handle_model_error(
|
|
283
348
|
self,
|
|
284
349
|
graph,
|
|
@@ -311,10 +376,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
311
376
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
312
377
|
)
|
|
313
378
|
|
|
314
|
-
def delete_tsdb_records(
|
|
315
|
-
self,
|
|
316
|
-
endpoint_ids: list[str],
|
|
317
|
-
):
|
|
379
|
+
def delete_tsdb_records(self, endpoint_ids: list[str]) -> None:
|
|
318
380
|
"""
|
|
319
381
|
To delete subtables within TDEngine, we first query the subtables names with the provided endpoint_ids.
|
|
320
382
|
Then, we drop each subtable.
|
|
@@ -332,9 +394,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
332
394
|
get_subtable_query = self.tables[table]._get_subtables_query_by_tag(
|
|
333
395
|
filter_tag="endpoint_id", filter_values=endpoint_ids
|
|
334
396
|
)
|
|
335
|
-
subtables_result = self.connection.run(
|
|
336
|
-
query=get_subtable_query,
|
|
337
|
-
)
|
|
397
|
+
subtables_result = self.connection.run(query=get_subtable_query)
|
|
338
398
|
subtables.extend([subtable[0] for subtable in subtables_result.data])
|
|
339
399
|
except Exception as e:
|
|
340
400
|
logger.warning(
|
|
@@ -346,15 +406,13 @@ class TDEngineConnector(TSDBConnector):
|
|
|
346
406
|
)
|
|
347
407
|
|
|
348
408
|
# Prepare the drop statements
|
|
349
|
-
drop_statements = [
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
)
|
|
409
|
+
drop_statements = [
|
|
410
|
+
self.tables[table].drop_subtable_query(subtable=subtable)
|
|
411
|
+
for subtable in subtables
|
|
412
|
+
]
|
|
354
413
|
try:
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
)
|
|
414
|
+
logger.debug("Dropping subtables", drop_statements=drop_statements)
|
|
415
|
+
self.connection.run(statements=drop_statements)
|
|
358
416
|
except Exception as e:
|
|
359
417
|
logger.warning(
|
|
360
418
|
"Failed to delete model endpoint resources. You may need to delete them manually. "
|
|
@@ -369,6 +427,48 @@ class TDEngineConnector(TSDBConnector):
|
|
|
369
427
|
number_of_endpoints_to_delete=len(endpoint_ids),
|
|
370
428
|
)
|
|
371
429
|
|
|
430
|
+
def delete_application_records(
|
|
431
|
+
self, application_name: str, endpoint_ids: Optional[list[str]] = None
|
|
432
|
+
) -> None:
|
|
433
|
+
"""
|
|
434
|
+
Delete application records from the TSDB for the given model endpoints or all if ``endpoint_ids`` is ``None``.
|
|
435
|
+
"""
|
|
436
|
+
logger.debug(
|
|
437
|
+
"Deleting application records",
|
|
438
|
+
project=self.project,
|
|
439
|
+
application_name=application_name,
|
|
440
|
+
endpoint_ids=endpoint_ids,
|
|
441
|
+
)
|
|
442
|
+
tables = [
|
|
443
|
+
self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS],
|
|
444
|
+
self.tables[mm_schemas.TDEngineSuperTables.METRICS],
|
|
445
|
+
]
|
|
446
|
+
|
|
447
|
+
filter_query = self._generate_filter_query(
|
|
448
|
+
filter_column=mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
449
|
+
filter_values=application_name,
|
|
450
|
+
)
|
|
451
|
+
if endpoint_ids:
|
|
452
|
+
endpoint_ids_filter = self._generate_filter_query(
|
|
453
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
454
|
+
filter_values=endpoint_ids,
|
|
455
|
+
)
|
|
456
|
+
filter_query += f" AND {endpoint_ids_filter}"
|
|
457
|
+
|
|
458
|
+
drop_statements: list[str] = []
|
|
459
|
+
for table in tables:
|
|
460
|
+
get_subtable_query = table._get_tables_query_by_condition(filter_query)
|
|
461
|
+
subtables_result = self.connection.run(query=get_subtable_query)
|
|
462
|
+
drop_statements.extend(
|
|
463
|
+
[
|
|
464
|
+
table.drop_subtable_query(subtable=subtable[0])
|
|
465
|
+
for subtable in subtables_result.data
|
|
466
|
+
]
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
logger.debug("Dropping application records", drop_statements=drop_statements)
|
|
470
|
+
self.connection.run(statements=drop_statements)
|
|
471
|
+
|
|
372
472
|
def delete_tsdb_resources(self):
|
|
373
473
|
"""
|
|
374
474
|
Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
|
|
@@ -688,7 +788,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
688
788
|
endpoint_ids: Union[str, list[str]],
|
|
689
789
|
start: Optional[datetime] = None,
|
|
690
790
|
end: Optional[datetime] = None,
|
|
691
|
-
) -> pd.DataFrame:
|
|
791
|
+
) -> Union[pd.DataFrame, dict[str, float]]:
|
|
792
|
+
if not endpoint_ids:
|
|
793
|
+
return {}
|
|
692
794
|
filter_query = self._generate_filter_query(
|
|
693
795
|
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
694
796
|
filter_values=endpoint_ids,
|
|
@@ -823,7 +925,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
823
925
|
# Convert DataFrame to a dictionary
|
|
824
926
|
return {
|
|
825
927
|
(
|
|
826
|
-
row[mm_schemas.WriterEvent.APPLICATION_NAME],
|
|
928
|
+
row[mm_schemas.WriterEvent.APPLICATION_NAME].lower(),
|
|
827
929
|
row[mm_schemas.ResultData.RESULT_STATUS],
|
|
828
930
|
): row["count(result_value)"]
|
|
829
931
|
for _, row in df.iterrows()
|
|
@@ -908,26 +1010,34 @@ class TDEngineConnector(TSDBConnector):
|
|
|
908
1010
|
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
909
1011
|
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
910
1012
|
]
|
|
1013
|
+
agg_columns = [mm_schemas.WriterEvent.END_INFER_TIME]
|
|
1014
|
+
group_by_columns = [mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
911
1015
|
if record_type == "results":
|
|
912
1016
|
table = self.tables[
|
|
913
1017
|
mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
914
1018
|
].super_table
|
|
915
1019
|
columns += [
|
|
916
1020
|
mm_schemas.ResultData.RESULT_NAME,
|
|
1021
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
1022
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
1023
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
1024
|
+
]
|
|
1025
|
+
agg_columns += [
|
|
917
1026
|
mm_schemas.ResultData.RESULT_VALUE,
|
|
918
1027
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
919
1028
|
mm_schemas.ResultData.RESULT_KIND,
|
|
920
1029
|
]
|
|
921
|
-
|
|
1030
|
+
group_by_columns += [mm_schemas.ResultData.RESULT_NAME]
|
|
922
1031
|
else:
|
|
923
1032
|
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
924
1033
|
columns += [
|
|
925
1034
|
mm_schemas.MetricData.METRIC_NAME,
|
|
926
1035
|
mm_schemas.MetricData.METRIC_VALUE,
|
|
927
1036
|
]
|
|
928
|
-
|
|
1037
|
+
agg_columns += [mm_schemas.MetricData.METRIC_VALUE]
|
|
1038
|
+
group_by_columns += [mm_schemas.MetricData.METRIC_NAME]
|
|
929
1039
|
|
|
930
|
-
|
|
1040
|
+
df = self._get_records(
|
|
931
1041
|
table=table,
|
|
932
1042
|
start=start,
|
|
933
1043
|
end=end,
|
|
@@ -935,10 +1045,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
935
1045
|
filter_query=filter_query,
|
|
936
1046
|
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
937
1047
|
# Aggregate per application/metric pair regardless of timestamp
|
|
938
|
-
group_by=
|
|
939
|
-
preform_agg_columns=
|
|
1048
|
+
group_by=group_by_columns,
|
|
1049
|
+
preform_agg_columns=agg_columns,
|
|
940
1050
|
agg_funcs=["last"],
|
|
941
1051
|
)
|
|
1052
|
+
if not df.empty:
|
|
1053
|
+
for column in agg_columns:
|
|
1054
|
+
df.rename(
|
|
1055
|
+
columns={f"last({column})": column},
|
|
1056
|
+
inplace=True,
|
|
1057
|
+
)
|
|
1058
|
+
return df
|
|
942
1059
|
|
|
943
1060
|
df_results = get_latest_metrics_records(record_type="results")
|
|
944
1061
|
df_metrics = get_latest_metrics_records(record_type="metrics")
|
|
@@ -955,19 +1072,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
955
1072
|
]
|
|
956
1073
|
):
|
|
957
1074
|
metric_objects = []
|
|
958
|
-
|
|
959
1075
|
if not df_results.empty:
|
|
960
|
-
df_results.rename(
|
|
961
|
-
columns={
|
|
962
|
-
f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
|
|
963
|
-
},
|
|
964
|
-
inplace=True,
|
|
965
|
-
)
|
|
966
1076
|
for _, row in df_results.iterrows():
|
|
967
1077
|
metric_objects.append(
|
|
968
1078
|
mm_schemas.ApplicationResultRecord(
|
|
969
1079
|
time=datetime.fromisoformat(
|
|
970
|
-
row[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
1080
|
+
row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
|
|
1081
|
+
" +", "+"
|
|
1082
|
+
)
|
|
971
1083
|
),
|
|
972
1084
|
result_name=row[mm_schemas.ResultData.RESULT_NAME],
|
|
973
1085
|
kind=row[mm_schemas.ResultData.RESULT_KIND],
|
|
@@ -977,17 +1089,13 @@ class TDEngineConnector(TSDBConnector):
|
|
|
977
1089
|
)
|
|
978
1090
|
|
|
979
1091
|
if not df_metrics.empty:
|
|
980
|
-
df_metrics.rename(
|
|
981
|
-
columns={
|
|
982
|
-
f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
|
|
983
|
-
},
|
|
984
|
-
inplace=True,
|
|
985
|
-
)
|
|
986
1092
|
for _, row in df_metrics.iterrows():
|
|
987
1093
|
metric_objects.append(
|
|
988
1094
|
mm_schemas.ApplicationMetricRecord(
|
|
989
1095
|
time=datetime.fromisoformat(
|
|
990
|
-
row[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
1096
|
+
row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
|
|
1097
|
+
" +", "+"
|
|
1098
|
+
)
|
|
991
1099
|
),
|
|
992
1100
|
metric_name=row[mm_schemas.MetricData.METRIC_NAME],
|
|
993
1101
|
value=row[mm_schemas.MetricData.METRIC_VALUE],
|
|
@@ -1146,11 +1254,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1146
1254
|
df.dropna(inplace=True)
|
|
1147
1255
|
return df
|
|
1148
1256
|
|
|
1149
|
-
|
|
1257
|
+
def add_basic_metrics(
|
|
1150
1258
|
self,
|
|
1151
1259
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
1152
|
-
project: str,
|
|
1153
|
-
run_in_threadpool: Callable,
|
|
1154
1260
|
metric_list: Optional[list[str]] = None,
|
|
1155
1261
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
1156
1262
|
"""
|
|
@@ -1158,8 +1264,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1158
1264
|
|
|
1159
1265
|
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
1160
1266
|
be filled with the relevant basic metrics.
|
|
1161
|
-
:param project: The name of the project.
|
|
1162
|
-
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
1163
1267
|
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
1164
1268
|
|
|
1165
1269
|
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
18
|
+
import mlrun.feature_store.steps
|
|
19
|
+
from mlrun.utils import logger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
|
|
23
|
+
def __init__(self, **kwargs):
|
|
24
|
+
"""
|
|
25
|
+
Process the data before writing to TDEngine. This step create the table name.
|
|
26
|
+
|
|
27
|
+
:returns: Event as a dictionary which will be written into the TDEngine Metrics/Results tables.
|
|
28
|
+
"""
|
|
29
|
+
super().__init__(**kwargs)
|
|
30
|
+
|
|
31
|
+
def do(self, event):
|
|
32
|
+
logger.info("Process event before writing to TDEngine", event=event)
|
|
33
|
+
kind = event.get("kind")
|
|
34
|
+
table_name = (
|
|
35
|
+
f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
|
|
36
|
+
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
|
|
37
|
+
)
|
|
38
|
+
if kind == mm_schemas.WriterEventKind.RESULT:
|
|
39
|
+
# Write a new result
|
|
40
|
+
event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
|
|
41
|
+
f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
42
|
+
).replace("-", "_")
|
|
43
|
+
elif kind == mm_schemas.WriterEventKind.METRIC:
|
|
44
|
+
# Write a new metric
|
|
45
|
+
event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
|
|
46
|
+
f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
|
|
47
|
+
).replace("-", "_")
|
|
48
|
+
event[mm_schemas.WriterEvent.START_INFER_TIME] = datetime.fromisoformat(
|
|
49
|
+
event[mm_schemas.WriterEvent.START_INFER_TIME]
|
|
50
|
+
)
|
|
51
|
+
return event
|