mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
|
@@ -13,25 +13,36 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
import sys
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
from contextlib import AbstractContextManager
|
|
19
|
+
from datetime import datetime
|
|
18
20
|
from types import TracebackType
|
|
19
|
-
from typing import Final, Optional
|
|
21
|
+
from typing import TYPE_CHECKING, Final, Optional
|
|
20
22
|
|
|
21
23
|
import botocore.exceptions
|
|
22
24
|
|
|
25
|
+
import mlrun
|
|
23
26
|
import mlrun.common.schemas as schemas
|
|
24
27
|
import mlrun.errors
|
|
25
28
|
import mlrun.model_monitoring.helpers
|
|
29
|
+
import mlrun.utils.helpers
|
|
26
30
|
from mlrun.utils import logger
|
|
27
31
|
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
if sys.version_info >= (3, 11):
|
|
34
|
+
from typing import Self
|
|
35
|
+
else:
|
|
36
|
+
from typing_extensions import Self
|
|
37
|
+
|
|
28
38
|
|
|
29
39
|
class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
30
40
|
DEFAULT_SCHEDULES: Final = {}
|
|
31
41
|
INITIAL_CONTENT = json.dumps(DEFAULT_SCHEDULES)
|
|
32
42
|
ENCODING = "utf-8"
|
|
33
43
|
|
|
34
|
-
def __init__(self):
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
# `self._item` is the persistent version of the monitoring schedules.
|
|
35
46
|
self._item = self.get_data_item_object()
|
|
36
47
|
if self._item:
|
|
37
48
|
self._path = self._item.url
|
|
@@ -43,9 +54,16 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
43
54
|
self._open_schedules = False
|
|
44
55
|
|
|
45
56
|
@abstractmethod
|
|
46
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
57
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
47
58
|
pass
|
|
48
59
|
|
|
60
|
+
def _exists(self) -> bool:
|
|
61
|
+
"""Return whether the file exists or not"""
|
|
62
|
+
return (
|
|
63
|
+
self._fs is None # In-memory store
|
|
64
|
+
or self._fs.exists(self._path)
|
|
65
|
+
)
|
|
66
|
+
|
|
49
67
|
def create(self) -> None:
|
|
50
68
|
"""Create a schedules file with initial content - an empty dictionary"""
|
|
51
69
|
logger.debug("Creating model monitoring schedules file", path=self._item.url)
|
|
@@ -53,10 +71,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
53
71
|
|
|
54
72
|
def delete(self) -> None:
|
|
55
73
|
"""Delete schedules file if it exists"""
|
|
56
|
-
if (
|
|
57
|
-
self._fs is None # In-memory store
|
|
58
|
-
or self._fs.exists(self._path)
|
|
59
|
-
):
|
|
74
|
+
if self._exists():
|
|
60
75
|
logger.debug(
|
|
61
76
|
"Deleting model monitoring schedules file", path=self._item.url
|
|
62
77
|
)
|
|
@@ -100,7 +115,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
100
115
|
self._schedules = self.DEFAULT_SCHEDULES
|
|
101
116
|
self._open_schedules = False
|
|
102
117
|
|
|
103
|
-
def __enter__(self) -> "
|
|
118
|
+
def __enter__(self) -> "Self":
|
|
104
119
|
self._open()
|
|
105
120
|
return super().__enter__()
|
|
106
121
|
|
|
@@ -129,12 +144,11 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
|
|
|
129
144
|
:param project: The project name.
|
|
130
145
|
:param endpoint_id: The endpoint ID.
|
|
131
146
|
"""
|
|
132
|
-
# `self._item` is the persistent version of the monitoring schedules.
|
|
133
147
|
self._project = project
|
|
134
148
|
self._endpoint_id = endpoint_id
|
|
135
149
|
super().__init__()
|
|
136
150
|
|
|
137
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
151
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
138
152
|
return mlrun.model_monitoring.helpers.get_monitoring_schedules_endpoint_data(
|
|
139
153
|
project=self._project, endpoint_id=self._endpoint_id
|
|
140
154
|
)
|
|
@@ -148,19 +162,29 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
|
|
|
148
162
|
endpoint_id=model_endpoint.metadata.uid,
|
|
149
163
|
)
|
|
150
164
|
|
|
151
|
-
def get_application_time(self, application: str) -> Optional[
|
|
165
|
+
def get_application_time(self, application: str) -> Optional[float]:
|
|
152
166
|
self._check_open_schedules()
|
|
153
167
|
return self._schedules.get(application)
|
|
154
168
|
|
|
155
|
-
def update_application_time(self, application: str, timestamp:
|
|
169
|
+
def update_application_time(self, application: str, timestamp: float) -> None:
|
|
156
170
|
self._check_open_schedules()
|
|
157
|
-
self._schedules[application] = timestamp
|
|
171
|
+
self._schedules[application] = float(timestamp)
|
|
172
|
+
|
|
173
|
+
def delete_application_time(self, application: str) -> None:
|
|
174
|
+
self._check_open_schedules()
|
|
175
|
+
if application in self._schedules:
|
|
176
|
+
logger.debug(
|
|
177
|
+
"Deleting application time from schedules",
|
|
178
|
+
application=application,
|
|
179
|
+
endpoint_id=self._endpoint_id,
|
|
180
|
+
)
|
|
181
|
+
del self._schedules[application]
|
|
158
182
|
|
|
159
183
|
def get_application_list(self) -> set[str]:
|
|
160
184
|
self._check_open_schedules()
|
|
161
185
|
return set(self._schedules.keys())
|
|
162
186
|
|
|
163
|
-
def get_min_timestamp(self) -> Optional[
|
|
187
|
+
def get_min_timestamp(self) -> Optional[float]:
|
|
164
188
|
self._check_open_schedules()
|
|
165
189
|
return min(self._schedules.values(), default=None)
|
|
166
190
|
|
|
@@ -179,12 +203,12 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
179
203
|
self._project = project
|
|
180
204
|
super().__init__()
|
|
181
205
|
|
|
182
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
206
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
183
207
|
return mlrun.model_monitoring.helpers.get_monitoring_schedules_chief_data(
|
|
184
208
|
project=self._project
|
|
185
209
|
)
|
|
186
210
|
|
|
187
|
-
def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[
|
|
211
|
+
def get_endpoint_last_request(self, endpoint_uid: str) -> Optional[float]:
|
|
188
212
|
self._check_open_schedules()
|
|
189
213
|
if endpoint_uid in self._schedules:
|
|
190
214
|
return self._schedules[endpoint_uid].get(
|
|
@@ -194,15 +218,19 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
194
218
|
return None
|
|
195
219
|
|
|
196
220
|
def update_endpoint_timestamps(
|
|
197
|
-
self, endpoint_uid: str, last_request:
|
|
221
|
+
self, endpoint_uid: str, last_request: float, last_analyzed: float
|
|
198
222
|
) -> None:
|
|
199
223
|
self._check_open_schedules()
|
|
200
224
|
self._schedules[endpoint_uid] = {
|
|
201
|
-
schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST:
|
|
202
|
-
|
|
225
|
+
schemas.model_monitoring.constants.ScheduleChiefFields.LAST_REQUEST: float(
|
|
226
|
+
last_request
|
|
227
|
+
),
|
|
228
|
+
schemas.model_monitoring.constants.ScheduleChiefFields.LAST_ANALYZED: float(
|
|
229
|
+
last_analyzed
|
|
230
|
+
),
|
|
203
231
|
}
|
|
204
232
|
|
|
205
|
-
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[
|
|
233
|
+
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[float]:
|
|
206
234
|
self._check_open_schedules()
|
|
207
235
|
if endpoint_uid in self._schedules:
|
|
208
236
|
return self._schedules[endpoint_uid].get(
|
|
@@ -216,22 +244,58 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
216
244
|
return set(self._schedules.keys())
|
|
217
245
|
|
|
218
246
|
def get_or_create(self) -> None:
|
|
219
|
-
|
|
220
|
-
self._open()
|
|
221
|
-
except (
|
|
222
|
-
mlrun.errors.MLRunNotFoundError,
|
|
223
|
-
# Different errors are raised for S3 or local storage, see ML-8042
|
|
224
|
-
botocore.exceptions.ClientError,
|
|
225
|
-
FileNotFoundError,
|
|
226
|
-
):
|
|
247
|
+
if not self._exists():
|
|
227
248
|
self.create()
|
|
228
249
|
|
|
229
250
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
251
|
+
class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
|
|
252
|
+
def __init__(self, out_path: str, application: str) -> None:
|
|
253
|
+
self._out_path = out_path
|
|
254
|
+
self._application = application
|
|
255
|
+
super().__init__()
|
|
256
|
+
|
|
257
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
258
|
+
return mlrun.model_monitoring.helpers.get_monitoring_schedules_user_application_data(
|
|
259
|
+
out_path=self._out_path, application=self._application
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _open(self) -> None:
|
|
263
|
+
if not self._exists():
|
|
264
|
+
# Create the file when it is needed the first time
|
|
265
|
+
logger.info(
|
|
266
|
+
"Creating the application schedules file",
|
|
267
|
+
application=self._application,
|
|
268
|
+
path=self._path,
|
|
269
|
+
)
|
|
270
|
+
self.create()
|
|
271
|
+
super()._open()
|
|
272
|
+
|
|
273
|
+
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[datetime]:
|
|
274
|
+
self._check_open_schedules()
|
|
275
|
+
if endpoint_uid in self._schedules:
|
|
276
|
+
return datetime.fromisoformat(self._schedules[endpoint_uid])
|
|
277
|
+
else:
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
def update_endpoint_last_analyzed(
|
|
281
|
+
self, endpoint_uid: str, last_analyzed: datetime
|
|
282
|
+
) -> None:
|
|
283
|
+
self._check_open_schedules()
|
|
284
|
+
self._schedules[endpoint_uid] = last_analyzed.isoformat()
|
|
285
|
+
|
|
286
|
+
def delete_endpoints_last_analyzed(self, endpoint_uids: list[str]) -> None:
|
|
287
|
+
self._check_open_schedules()
|
|
288
|
+
for endpoint_uid in endpoint_uids:
|
|
289
|
+
if endpoint_uid in self._schedules:
|
|
290
|
+
logger.debug(
|
|
291
|
+
"Deleting endpoint last analyzed from schedules",
|
|
292
|
+
endpoint_uid=endpoint_uid,
|
|
293
|
+
application=self._application,
|
|
294
|
+
)
|
|
295
|
+
del self._schedules[endpoint_uid]
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _delete_folder(folder: str) -> None:
|
|
235
299
|
fs = mlrun.datastore.store_manager.object(folder).store.filesystem
|
|
236
300
|
if fs and fs.exists(folder):
|
|
237
301
|
logger.debug("Deleting model monitoring schedules folder", folder=folder)
|
|
@@ -240,3 +304,22 @@ def delete_model_monitoring_schedules_folder(project: str) -> None:
|
|
|
240
304
|
raise mlrun.errors.MLRunValueError(
|
|
241
305
|
"Cannot delete a folder without a file-system"
|
|
242
306
|
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def delete_model_monitoring_schedules_folder(project: str) -> None:
|
|
310
|
+
"""Delete the model monitoring schedules folder of the project"""
|
|
311
|
+
folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_folder_path(
|
|
312
|
+
project
|
|
313
|
+
)
|
|
314
|
+
_delete_folder(folder)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def delete_model_monitoring_schedules_user_folder(project: str) -> None:
|
|
318
|
+
"""Delete the user created schedules folder (created through `app.evaluate`)"""
|
|
319
|
+
out_path = mlrun.utils.helpers.template_artifact_path(
|
|
320
|
+
mlrun.mlconf.artifact_path, project=project
|
|
321
|
+
)
|
|
322
|
+
folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_user_folder_path(
|
|
323
|
+
out_path
|
|
324
|
+
)
|
|
325
|
+
_delete_folder(folder)
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import json
|
|
16
|
+
import typing
|
|
16
17
|
from abc import abstractmethod
|
|
17
18
|
from datetime import datetime, timezone
|
|
18
19
|
from typing import cast
|
|
@@ -73,7 +74,7 @@ class ModelMonitoringStatsFile(abc.ABC):
|
|
|
73
74
|
path=self._item.url,
|
|
74
75
|
)
|
|
75
76
|
|
|
76
|
-
def read(self) -> tuple[dict, datetime]:
|
|
77
|
+
def read(self) -> tuple[dict, typing.Optional[datetime]]:
|
|
77
78
|
"""
|
|
78
79
|
Read the stats data and timestamp saved in file
|
|
79
80
|
:return: tuple[dict, str] dictionary with stats data and timestamp saved in file
|
|
@@ -99,13 +100,13 @@ class ModelMonitoringStatsFile(abc.ABC):
|
|
|
99
100
|
):
|
|
100
101
|
raise
|
|
101
102
|
|
|
102
|
-
logger.
|
|
103
|
+
logger.warning(
|
|
103
104
|
"The Stats file was not found. It should have been created "
|
|
104
105
|
"as a part of the model endpoint's creation",
|
|
105
106
|
path=self._path,
|
|
106
107
|
error=err,
|
|
107
108
|
)
|
|
108
|
-
|
|
109
|
+
return {}, None
|
|
109
110
|
|
|
110
111
|
def write(self, stats: dict, timestamp: datetime) -> None:
|
|
111
112
|
"""
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from datetime import datetime
|
|
17
|
-
from typing import
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
from typing import ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import pydantic.v1
|
|
@@ -60,6 +60,16 @@ class TSDBConnector(ABC):
|
|
|
60
60
|
"""
|
|
61
61
|
pass
|
|
62
62
|
|
|
63
|
+
def apply_writer_steps(self, graph, after, **kwargs) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Apply TSDB steps on the provided writer graph. Throughout these steps, the graph stores metrics / results.
|
|
66
|
+
This data is being used by mlrun UI and the monitoring dashboards in grafana.
|
|
67
|
+
There are 2 different key metric dictionaries that are being generated throughout these steps:
|
|
68
|
+
- metrics (user-defined metrics) - model monitoring application metrics
|
|
69
|
+
- results (user-defined results) - model monitoring application results
|
|
70
|
+
"""
|
|
71
|
+
pass
|
|
72
|
+
|
|
63
73
|
@abstractmethod
|
|
64
74
|
def handle_model_error(self, graph, **kwargs) -> None:
|
|
65
75
|
"""
|
|
@@ -81,14 +91,38 @@ class TSDBConnector(ABC):
|
|
|
81
91
|
"""
|
|
82
92
|
|
|
83
93
|
@abstractmethod
|
|
84
|
-
def
|
|
94
|
+
def get_drift_data(
|
|
85
95
|
self,
|
|
86
|
-
|
|
87
|
-
|
|
96
|
+
start: datetime,
|
|
97
|
+
end: datetime,
|
|
98
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
99
|
+
"""
|
|
100
|
+
Fetches drift counts per interval in the specified time range.
|
|
101
|
+
|
|
102
|
+
:param start: The start time of the query.
|
|
103
|
+
:param end: The end time of the query.
|
|
104
|
+
|
|
105
|
+
:return: A ModelEndpointDriftValues object containing drift data.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def delete_tsdb_records(self, endpoint_ids: list[str]) -> None:
|
|
88
110
|
"""
|
|
89
111
|
Delete model endpoint records from the TSDB connector.
|
|
112
|
+
|
|
90
113
|
:param endpoint_ids: List of model endpoint unique identifiers.
|
|
91
|
-
|
|
114
|
+
"""
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def delete_application_records(
|
|
119
|
+
self, application_name: str, endpoint_ids: Optional[list[str]] = None
|
|
120
|
+
) -> None:
|
|
121
|
+
"""
|
|
122
|
+
Delete application records from the TSDB for the given model endpoints or all if ``None``.
|
|
123
|
+
|
|
124
|
+
:param application_name: The name of the application to delete records for.
|
|
125
|
+
:param endpoint_ids: List of model endpoint unique identifiers.
|
|
92
126
|
"""
|
|
93
127
|
pass
|
|
94
128
|
|
|
@@ -410,11 +444,9 @@ class TSDBConnector(ABC):
|
|
|
410
444
|
]
|
|
411
445
|
"""
|
|
412
446
|
|
|
413
|
-
|
|
447
|
+
def add_basic_metrics(
|
|
414
448
|
self,
|
|
415
449
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
416
|
-
project: str,
|
|
417
|
-
run_in_threadpool: Callable,
|
|
418
450
|
metric_list: Optional[list[str]] = None,
|
|
419
451
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
420
452
|
raise NotImplementedError()
|
|
@@ -703,3 +735,62 @@ class TSDBConnector(ABC):
|
|
|
703
735
|
)
|
|
704
736
|
)
|
|
705
737
|
return {dict_key: metrics}
|
|
738
|
+
|
|
739
|
+
@staticmethod
|
|
740
|
+
def _prepare_aligned_start_end(
|
|
741
|
+
start: datetime, end: datetime
|
|
742
|
+
) -> tuple[datetime, datetime, str]:
|
|
743
|
+
delta = end - start
|
|
744
|
+
if delta <= timedelta(hours=6):
|
|
745
|
+
interval = "10m"
|
|
746
|
+
start = start.replace(
|
|
747
|
+
minute=start.minute // 10 * 10, second=0, microsecond=0
|
|
748
|
+
)
|
|
749
|
+
elif delta <= timedelta(hours=72):
|
|
750
|
+
interval = "1h"
|
|
751
|
+
start = start.replace(minute=0, second=0, microsecond=0)
|
|
752
|
+
else:
|
|
753
|
+
interval = "1d"
|
|
754
|
+
start = start.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
755
|
+
|
|
756
|
+
interval_map = {
|
|
757
|
+
"10m": timedelta(minutes=10),
|
|
758
|
+
"1h": timedelta(hours=1),
|
|
759
|
+
"1d": timedelta(days=1),
|
|
760
|
+
}
|
|
761
|
+
delta = end - start
|
|
762
|
+
interval_td = interval_map[interval]
|
|
763
|
+
end = start + (delta // interval_td) * interval_td
|
|
764
|
+
return start, end, interval
|
|
765
|
+
|
|
766
|
+
@staticmethod
|
|
767
|
+
def _df_to_drift_data(df: pd.DataFrame) -> mm_schemas.ModelEndpointDriftValues:
|
|
768
|
+
suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
|
|
769
|
+
detected_val = mm_schemas.constants.ResultStatusApp.detected.value
|
|
770
|
+
aggregated_df = (
|
|
771
|
+
df.groupby(["_wstart", f"max({mm_schemas.ResultData.RESULT_STATUS})"])
|
|
772
|
+
.size() # add size column for each interval x result-status combination
|
|
773
|
+
.unstack() # create a size column for each result-status
|
|
774
|
+
.reindex(
|
|
775
|
+
columns=[suspected_val, detected_val], fill_value=0
|
|
776
|
+
) # ensure both columns exists
|
|
777
|
+
.fillna(0)
|
|
778
|
+
.astype(int)
|
|
779
|
+
.rename(
|
|
780
|
+
columns={
|
|
781
|
+
suspected_val: "count_suspected",
|
|
782
|
+
detected_val: "count_detected",
|
|
783
|
+
}
|
|
784
|
+
)
|
|
785
|
+
)
|
|
786
|
+
values = list(
|
|
787
|
+
zip(
|
|
788
|
+
aggregated_df.index,
|
|
789
|
+
aggregated_df["count_suspected"],
|
|
790
|
+
aggregated_df["count_detected"],
|
|
791
|
+
)
|
|
792
|
+
)
|
|
793
|
+
return mm_schemas.ModelEndpointDriftValues(values=values)
|
|
794
|
+
|
|
795
|
+
def add_pre_writer_steps(self, graph, after):
|
|
796
|
+
return None
|
|
@@ -122,10 +122,7 @@ class TDEngineSchema:
|
|
|
122
122
|
)
|
|
123
123
|
return f"DELETE FROM {self.database}.{subtable} WHERE {values};"
|
|
124
124
|
|
|
125
|
-
def drop_subtable_query(
|
|
126
|
-
self,
|
|
127
|
-
subtable: str,
|
|
128
|
-
) -> str:
|
|
125
|
+
def drop_subtable_query(self, subtable: str) -> str:
|
|
129
126
|
return f"DROP TABLE if EXISTS {self.database}.`{subtable}`;"
|
|
130
127
|
|
|
131
128
|
def drop_supertable_query(self) -> str:
|
|
@@ -145,8 +142,10 @@ class TDEngineSchema:
|
|
|
145
142
|
values = f" {operator} ".join(
|
|
146
143
|
f"{filter_tag} LIKE '{val}'" for val in filter_values
|
|
147
144
|
)
|
|
145
|
+
return self._get_tables_query_by_condition(values)
|
|
148
146
|
|
|
149
|
-
|
|
147
|
+
def _get_tables_query_by_condition(self, condition: str) -> str:
|
|
148
|
+
return f"SELECT DISTINCT TBNAME FROM {self.database}.{self.super_table} WHERE {condition};"
|
|
150
149
|
|
|
151
150
|
@staticmethod
|
|
152
151
|
def _get_records_query(
|
|
@@ -165,6 +164,7 @@ class TDEngineSchema:
|
|
|
165
164
|
preform_agg_funcs_columns: Optional[list[str]] = None,
|
|
166
165
|
order_by: Optional[str] = None,
|
|
167
166
|
desc: Optional[bool] = None,
|
|
167
|
+
partition_by: Optional[str] = None,
|
|
168
168
|
) -> str:
|
|
169
169
|
if agg_funcs and not columns_to_filter:
|
|
170
170
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -176,7 +176,10 @@ class TDEngineSchema:
|
|
|
176
176
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
177
177
|
"`agg_funcs` must be provided when using interval"
|
|
178
178
|
)
|
|
179
|
-
|
|
179
|
+
if partition_by and not agg_funcs:
|
|
180
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
181
|
+
"`agg_funcs` must be provided when using partition by"
|
|
182
|
+
)
|
|
180
183
|
if sliding_window_step and not interval:
|
|
181
184
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
182
185
|
"`interval` must be provided when using sliding window"
|
|
@@ -232,6 +235,8 @@ class TDEngineSchema:
|
|
|
232
235
|
if isinstance(group_by, list):
|
|
233
236
|
group_by = ", ".join(group_by)
|
|
234
237
|
query.write(f" GROUP BY {group_by}")
|
|
238
|
+
if partition_by:
|
|
239
|
+
query.write(f" PARTITION BY {partition_by}")
|
|
235
240
|
if order_by:
|
|
236
241
|
desc = " DESC" if desc else ""
|
|
237
242
|
query.write(f" ORDER BY {order_by}{desc}")
|