mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/llm_prompt.py +106 -20
- mlrun/artifacts/manager.py +0 -5
- mlrun/common/constants.py +0 -1
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/functions.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
- mlrun/common/schemas/workflow.py +0 -1
- mlrun/config.py +1 -1
- mlrun/datastore/model_provider/model_provider.py +42 -14
- mlrun/datastore/model_provider/openai_provider.py +96 -15
- mlrun/db/base.py +14 -0
- mlrun/db/httpdb.py +42 -9
- mlrun/db/nopdb.py +8 -0
- mlrun/execution.py +16 -7
- mlrun/model.py +15 -0
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/applications/base.py +176 -20
- mlrun/model_monitoring/db/_schedules.py +84 -24
- mlrun/model_monitoring/db/tsdb/base.py +72 -1
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
- mlrun/model_monitoring/helpers.py +26 -4
- mlrun/projects/project.py +38 -12
- mlrun/runtimes/daskjob.py +6 -0
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/application/application.py +2 -0
- mlrun/runtimes/nuclio/function.py +6 -0
- mlrun/runtimes/nuclio/serving.py +12 -11
- mlrun/runtimes/pod.py +21 -0
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/server.py +95 -26
- mlrun/serving/states.py +130 -10
- mlrun/utils/helpers.py +36 -12
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/METADATA +3 -8
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/RECORD +47 -47
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc15.dist-info}/top_level.txt +0 -0
|
@@ -13,25 +13,36 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
import sys
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
from contextlib import AbstractContextManager
|
|
19
|
+
from datetime import datetime, timezone
|
|
18
20
|
from types import TracebackType
|
|
19
|
-
from typing import Final, Optional
|
|
21
|
+
from typing import TYPE_CHECKING, Final, Optional
|
|
20
22
|
|
|
21
23
|
import botocore.exceptions
|
|
22
24
|
|
|
25
|
+
import mlrun
|
|
23
26
|
import mlrun.common.schemas as schemas
|
|
24
27
|
import mlrun.errors
|
|
25
28
|
import mlrun.model_monitoring.helpers
|
|
29
|
+
import mlrun.utils.helpers
|
|
26
30
|
from mlrun.utils import logger
|
|
27
31
|
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
if sys.version_info >= (3, 11):
|
|
34
|
+
from typing import Self
|
|
35
|
+
else:
|
|
36
|
+
from typing_extensions import Self
|
|
37
|
+
|
|
28
38
|
|
|
29
39
|
class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
30
40
|
DEFAULT_SCHEDULES: Final = {}
|
|
31
41
|
INITIAL_CONTENT = json.dumps(DEFAULT_SCHEDULES)
|
|
32
42
|
ENCODING = "utf-8"
|
|
33
43
|
|
|
34
|
-
def __init__(self):
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
# `self._item` is the persistent version of the monitoring schedules.
|
|
35
46
|
self._item = self.get_data_item_object()
|
|
36
47
|
if self._item:
|
|
37
48
|
self._path = self._item.url
|
|
@@ -43,9 +54,16 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
43
54
|
self._open_schedules = False
|
|
44
55
|
|
|
45
56
|
@abstractmethod
|
|
46
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
57
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
47
58
|
pass
|
|
48
59
|
|
|
60
|
+
def _exists(self) -> bool:
|
|
61
|
+
"""Return whether the file exists or not"""
|
|
62
|
+
return (
|
|
63
|
+
self._fs is None # In-memory store
|
|
64
|
+
or self._fs.exists(self._path)
|
|
65
|
+
)
|
|
66
|
+
|
|
49
67
|
def create(self) -> None:
|
|
50
68
|
"""Create a schedules file with initial content - an empty dictionary"""
|
|
51
69
|
logger.debug("Creating model monitoring schedules file", path=self._item.url)
|
|
@@ -53,10 +71,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
53
71
|
|
|
54
72
|
def delete(self) -> None:
|
|
55
73
|
"""Delete schedules file if it exists"""
|
|
56
|
-
if (
|
|
57
|
-
self._fs is None # In-memory store
|
|
58
|
-
or self._fs.exists(self._path)
|
|
59
|
-
):
|
|
74
|
+
if self._exists():
|
|
60
75
|
logger.debug(
|
|
61
76
|
"Deleting model monitoring schedules file", path=self._item.url
|
|
62
77
|
)
|
|
@@ -100,7 +115,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
100
115
|
self._schedules = self.DEFAULT_SCHEDULES
|
|
101
116
|
self._open_schedules = False
|
|
102
117
|
|
|
103
|
-
def __enter__(self) -> "
|
|
118
|
+
def __enter__(self) -> "Self":
|
|
104
119
|
self._open()
|
|
105
120
|
return super().__enter__()
|
|
106
121
|
|
|
@@ -129,12 +144,11 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
|
|
|
129
144
|
:param project: The project name.
|
|
130
145
|
:param endpoint_id: The endpoint ID.
|
|
131
146
|
"""
|
|
132
|
-
# `self._item` is the persistent version of the monitoring schedules.
|
|
133
147
|
self._project = project
|
|
134
148
|
self._endpoint_id = endpoint_id
|
|
135
149
|
super().__init__()
|
|
136
150
|
|
|
137
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
151
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
138
152
|
return mlrun.model_monitoring.helpers.get_monitoring_schedules_endpoint_data(
|
|
139
153
|
project=self._project, endpoint_id=self._endpoint_id
|
|
140
154
|
)
|
|
@@ -179,7 +193,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
179
193
|
self._project = project
|
|
180
194
|
super().__init__()
|
|
181
195
|
|
|
182
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
196
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
183
197
|
return mlrun.model_monitoring.helpers.get_monitoring_schedules_chief_data(
|
|
184
198
|
project=self._project
|
|
185
199
|
)
|
|
@@ -216,22 +230,49 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
216
230
|
return set(self._schedules.keys())
|
|
217
231
|
|
|
218
232
|
def get_or_create(self) -> None:
|
|
219
|
-
|
|
220
|
-
self._open()
|
|
221
|
-
except (
|
|
222
|
-
mlrun.errors.MLRunNotFoundError,
|
|
223
|
-
# Different errors are raised for S3 or local storage, see ML-8042
|
|
224
|
-
botocore.exceptions.ClientError,
|
|
225
|
-
FileNotFoundError,
|
|
226
|
-
):
|
|
233
|
+
if not self._exists():
|
|
227
234
|
self.create()
|
|
228
235
|
|
|
229
236
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
237
|
+
class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
|
|
238
|
+
def __init__(self, out_path: str, application: str) -> None:
|
|
239
|
+
self._out_path = out_path
|
|
240
|
+
self._application = application
|
|
241
|
+
super().__init__()
|
|
242
|
+
|
|
243
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
244
|
+
return mlrun.model_monitoring.helpers.get_monitoring_schedules_user_application_data(
|
|
245
|
+
out_path=self._out_path, application=self._application
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def _open(self) -> None:
|
|
249
|
+
if not self._exists():
|
|
250
|
+
# Create the file when it is needed the first time
|
|
251
|
+
logger.info(
|
|
252
|
+
"Creating the application schedules file",
|
|
253
|
+
application=self._application,
|
|
254
|
+
path=self._path,
|
|
255
|
+
)
|
|
256
|
+
self.create()
|
|
257
|
+
super()._open()
|
|
258
|
+
|
|
259
|
+
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[datetime]:
|
|
260
|
+
self._check_open_schedules()
|
|
261
|
+
if endpoint_uid in self._schedules:
|
|
262
|
+
return datetime.fromisoformat(self._schedules[endpoint_uid])
|
|
263
|
+
else:
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
def update_endpoint_last_analyzed(
|
|
267
|
+
self, endpoint_uid: str, last_analyzed: datetime
|
|
268
|
+
) -> None:
|
|
269
|
+
self._check_open_schedules()
|
|
270
|
+
self._schedules[endpoint_uid] = last_analyzed.astimezone(
|
|
271
|
+
timezone.utc
|
|
272
|
+
).isoformat()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _delete_folder(folder: str) -> None:
|
|
235
276
|
fs = mlrun.datastore.store_manager.object(folder).store.filesystem
|
|
236
277
|
if fs and fs.exists(folder):
|
|
237
278
|
logger.debug("Deleting model monitoring schedules folder", folder=folder)
|
|
@@ -240,3 +281,22 @@ def delete_model_monitoring_schedules_folder(project: str) -> None:
|
|
|
240
281
|
raise mlrun.errors.MLRunValueError(
|
|
241
282
|
"Cannot delete a folder without a file-system"
|
|
242
283
|
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def delete_model_monitoring_schedules_folder(project: str) -> None:
|
|
287
|
+
"""Delete the model monitoring schedules folder of the project"""
|
|
288
|
+
folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_folder_path(
|
|
289
|
+
project
|
|
290
|
+
)
|
|
291
|
+
_delete_folder(folder)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def delete_model_monitoring_schedules_user_folder(project: str) -> None:
|
|
295
|
+
"""Delete the user created schedules folder (created through `app.evaluate`)"""
|
|
296
|
+
out_path = mlrun.utils.helpers.template_artifact_path(
|
|
297
|
+
mlrun.mlconf.artifact_path, project=project
|
|
298
|
+
)
|
|
299
|
+
folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_user_folder_path(
|
|
300
|
+
out_path
|
|
301
|
+
)
|
|
302
|
+
_delete_folder(folder)
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from datetime import datetime
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
17
|
from typing import Callable, ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
@@ -80,6 +80,21 @@ class TSDBConnector(ABC):
|
|
|
80
80
|
:raise mlrun.errors.MLRunRuntimeError: If an error occurred while writing the event.
|
|
81
81
|
"""
|
|
82
82
|
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def get_drift_data(
|
|
85
|
+
self,
|
|
86
|
+
start: datetime,
|
|
87
|
+
end: datetime,
|
|
88
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
89
|
+
"""
|
|
90
|
+
Fetches drift counts per interval in the specified time range.
|
|
91
|
+
|
|
92
|
+
:param start: The start time of the query.
|
|
93
|
+
:param end: The end time of the query.
|
|
94
|
+
|
|
95
|
+
:return: A ModelEndpointDriftValues object containing drift data.
|
|
96
|
+
"""
|
|
97
|
+
|
|
83
98
|
@abstractmethod
|
|
84
99
|
def delete_tsdb_records(
|
|
85
100
|
self,
|
|
@@ -703,3 +718,59 @@ class TSDBConnector(ABC):
|
|
|
703
718
|
)
|
|
704
719
|
)
|
|
705
720
|
return {dict_key: metrics}
|
|
721
|
+
|
|
722
|
+
@staticmethod
|
|
723
|
+
def _prepare_aligned_start_end(
|
|
724
|
+
start: datetime, end: datetime
|
|
725
|
+
) -> tuple[datetime, datetime, str]:
|
|
726
|
+
delta = end - start
|
|
727
|
+
if delta <= timedelta(hours=6):
|
|
728
|
+
interval = "10m"
|
|
729
|
+
start = start.replace(
|
|
730
|
+
minute=start.minute // 10 * 10, second=0, microsecond=0
|
|
731
|
+
)
|
|
732
|
+
elif delta <= timedelta(hours=72):
|
|
733
|
+
interval = "1h"
|
|
734
|
+
start = start.replace(minute=0, second=0, microsecond=0)
|
|
735
|
+
else:
|
|
736
|
+
interval = "1d"
|
|
737
|
+
start = start.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
738
|
+
|
|
739
|
+
interval_map = {
|
|
740
|
+
"10m": timedelta(minutes=10),
|
|
741
|
+
"1h": timedelta(hours=1),
|
|
742
|
+
"1d": timedelta(days=1),
|
|
743
|
+
}
|
|
744
|
+
delta = end - start
|
|
745
|
+
interval_td = interval_map[interval]
|
|
746
|
+
end = start + (delta // interval_td) * interval_td
|
|
747
|
+
return start, end, interval
|
|
748
|
+
|
|
749
|
+
@staticmethod
|
|
750
|
+
def _df_to_drift_data(df: pd.DataFrame) -> mm_schemas.ModelEndpointDriftValues:
|
|
751
|
+
suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
|
|
752
|
+
detected_val = mm_schemas.constants.ResultStatusApp.detected.value
|
|
753
|
+
aggregated_df = (
|
|
754
|
+
df.groupby(["_wstart", f"max({mm_schemas.ResultData.RESULT_STATUS})"])
|
|
755
|
+
.size() # add size column for each interval x result-status combination
|
|
756
|
+
.unstack() # create a size column for each result-status
|
|
757
|
+
.reindex(
|
|
758
|
+
columns=[suspected_val, detected_val], fill_value=0
|
|
759
|
+
) # ensure both columns exists
|
|
760
|
+
.fillna(0)
|
|
761
|
+
.astype(int)
|
|
762
|
+
.rename(
|
|
763
|
+
columns={
|
|
764
|
+
suspected_val: "count_suspected",
|
|
765
|
+
detected_val: "count_detected",
|
|
766
|
+
}
|
|
767
|
+
)
|
|
768
|
+
)
|
|
769
|
+
values = list(
|
|
770
|
+
zip(
|
|
771
|
+
aggregated_df.index,
|
|
772
|
+
aggregated_df["count_suspected"],
|
|
773
|
+
aggregated_df["count_detected"],
|
|
774
|
+
)
|
|
775
|
+
)
|
|
776
|
+
return mm_schemas.ModelEndpointDriftValues(values=values)
|
|
@@ -165,6 +165,7 @@ class TDEngineSchema:
|
|
|
165
165
|
preform_agg_funcs_columns: Optional[list[str]] = None,
|
|
166
166
|
order_by: Optional[str] = None,
|
|
167
167
|
desc: Optional[bool] = None,
|
|
168
|
+
partition_by: Optional[str] = None,
|
|
168
169
|
) -> str:
|
|
169
170
|
if agg_funcs and not columns_to_filter:
|
|
170
171
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -176,7 +177,10 @@ class TDEngineSchema:
|
|
|
176
177
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
177
178
|
"`agg_funcs` must be provided when using interval"
|
|
178
179
|
)
|
|
179
|
-
|
|
180
|
+
if partition_by and not agg_funcs:
|
|
181
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
182
|
+
"`agg_funcs` must be provided when using partition by"
|
|
183
|
+
)
|
|
180
184
|
if sliding_window_step and not interval:
|
|
181
185
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
182
186
|
"`interval` must be provided when using sliding window"
|
|
@@ -232,6 +236,8 @@ class TDEngineSchema:
|
|
|
232
236
|
if isinstance(group_by, list):
|
|
233
237
|
group_by = ", ".join(group_by)
|
|
234
238
|
query.write(f" GROUP BY {group_by}")
|
|
239
|
+
if partition_by:
|
|
240
|
+
query.write(f" PARTITION BY {partition_by}")
|
|
235
241
|
if order_by:
|
|
236
242
|
desc = " DESC" if desc else ""
|
|
237
243
|
query.write(f" ORDER BY {order_by}{desc}")
|
|
@@ -469,6 +469,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
469
469
|
preform_agg_columns: Optional[list] = None,
|
|
470
470
|
order_by: Optional[str] = None,
|
|
471
471
|
desc: Optional[bool] = None,
|
|
472
|
+
partition_by: Optional[str] = None,
|
|
472
473
|
) -> pd.DataFrame:
|
|
473
474
|
"""
|
|
474
475
|
Getting records from TSDB data collection.
|
|
@@ -496,6 +497,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
496
497
|
if an empty list was provided The aggregation won't be performed.
|
|
497
498
|
:param order_by: The column or alias to preform ordering on the query.
|
|
498
499
|
:param desc: Whether or not to sort the results in descending order.
|
|
500
|
+
:param partition_by: The column to partition the results by. Note that if interval is provided,
|
|
501
|
+
`agg_funcs` must bg provided as well.
|
|
499
502
|
|
|
500
503
|
:return: DataFrame with the provided attributes from the data collection.
|
|
501
504
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
@@ -517,6 +520,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
517
520
|
preform_agg_funcs_columns=preform_agg_columns,
|
|
518
521
|
order_by=order_by,
|
|
519
522
|
desc=desc,
|
|
523
|
+
partition_by=partition_by,
|
|
520
524
|
)
|
|
521
525
|
logger.debug("Querying TDEngine", query=full_query)
|
|
522
526
|
try:
|
|
@@ -1205,6 +1209,39 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1205
1209
|
)
|
|
1206
1210
|
)
|
|
1207
1211
|
|
|
1212
|
+
def get_drift_data(
|
|
1213
|
+
self,
|
|
1214
|
+
start: datetime,
|
|
1215
|
+
end: datetime,
|
|
1216
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
1217
|
+
filter_query = self._generate_filter_query(
|
|
1218
|
+
filter_column=mm_schemas.ResultData.RESULT_STATUS,
|
|
1219
|
+
filter_values=[
|
|
1220
|
+
mm_schemas.ResultStatusApp.potential_detection.value,
|
|
1221
|
+
mm_schemas.ResultStatusApp.detected.value,
|
|
1222
|
+
],
|
|
1223
|
+
)
|
|
1224
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
|
|
1225
|
+
start, end, interval = self._prepare_aligned_start_end(start, end)
|
|
1226
|
+
|
|
1227
|
+
# get per time-interval x endpoint_id combination the max result status
|
|
1228
|
+
df = self._get_records(
|
|
1229
|
+
table=table,
|
|
1230
|
+
start=start,
|
|
1231
|
+
end=end,
|
|
1232
|
+
interval=interval,
|
|
1233
|
+
columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
1234
|
+
filter_query=filter_query,
|
|
1235
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
1236
|
+
agg_funcs=["max"],
|
|
1237
|
+
partition_by=mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
1238
|
+
)
|
|
1239
|
+
if df.empty:
|
|
1240
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1241
|
+
|
|
1242
|
+
df["_wstart"] = pd.to_datetime(df["_wstart"])
|
|
1243
|
+
return self._df_to_drift_data(df)
|
|
1244
|
+
|
|
1208
1245
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
1209
1246
|
#
|
|
1210
1247
|
# def read_prediction_metric_for_endpoint_if_exists(
|
|
@@ -1450,3 +1450,28 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1450
1450
|
return metric_objects
|
|
1451
1451
|
|
|
1452
1452
|
return build_metric_objects()
|
|
1453
|
+
|
|
1454
|
+
def get_drift_data(
|
|
1455
|
+
self,
|
|
1456
|
+
start: datetime,
|
|
1457
|
+
end: datetime,
|
|
1458
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
1459
|
+
table = mm_schemas.V3IOTSDBTables.APP_RESULTS
|
|
1460
|
+
start, end, interval = self._prepare_aligned_start_end(start, end)
|
|
1461
|
+
|
|
1462
|
+
# get per time-interval x endpoint_id combination the max result status
|
|
1463
|
+
df = self._get_records(
|
|
1464
|
+
table=table,
|
|
1465
|
+
start=start,
|
|
1466
|
+
end=end,
|
|
1467
|
+
interval=interval,
|
|
1468
|
+
sliding_window_step=interval,
|
|
1469
|
+
columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
1470
|
+
agg_funcs=["max"],
|
|
1471
|
+
group_by=mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
1472
|
+
)
|
|
1473
|
+
if df.empty:
|
|
1474
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1475
|
+
df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
|
|
1476
|
+
df = df.reset_index(names="_wstart")
|
|
1477
|
+
return self._df_to_drift_data(df)
|
|
@@ -549,6 +549,10 @@ def _get_monitoring_schedules_folder_path(project: str) -> str:
|
|
|
549
549
|
)
|
|
550
550
|
|
|
551
551
|
|
|
552
|
+
def _get_monitoring_schedules_user_folder_path(out_path: str) -> str:
|
|
553
|
+
return os.path.join(out_path, mm_constants.FileTargetKind.MONITORING_SCHEDULES)
|
|
554
|
+
|
|
555
|
+
|
|
552
556
|
def _get_monitoring_schedules_file_endpoint_path(
|
|
553
557
|
*, project: str, endpoint_id: str
|
|
554
558
|
) -> str:
|
|
@@ -570,10 +574,7 @@ def get_monitoring_schedules_endpoint_data(
|
|
|
570
574
|
)
|
|
571
575
|
|
|
572
576
|
|
|
573
|
-
def get_monitoring_schedules_chief_data(
|
|
574
|
-
*,
|
|
575
|
-
project: str,
|
|
576
|
-
) -> "DataItem":
|
|
577
|
+
def get_monitoring_schedules_chief_data(*, project: str) -> "DataItem":
|
|
577
578
|
"""
|
|
578
579
|
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
579
580
|
"""
|
|
@@ -582,6 +583,19 @@ def get_monitoring_schedules_chief_data(
|
|
|
582
583
|
)
|
|
583
584
|
|
|
584
585
|
|
|
586
|
+
def get_monitoring_schedules_user_application_data(
|
|
587
|
+
*, out_path: str, application: str
|
|
588
|
+
) -> "DataItem":
|
|
589
|
+
"""
|
|
590
|
+
Get the model monitoring schedules' data item of user application runs.
|
|
591
|
+
"""
|
|
592
|
+
return mlrun.datastore.store_manager.object(
|
|
593
|
+
_get_monitoring_schedules_file_user_application_path(
|
|
594
|
+
out_path=out_path, application=application
|
|
595
|
+
)
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
|
|
585
599
|
def _get_monitoring_schedules_file_chief_path(
|
|
586
600
|
*,
|
|
587
601
|
project: str,
|
|
@@ -591,6 +605,14 @@ def _get_monitoring_schedules_file_chief_path(
|
|
|
591
605
|
)
|
|
592
606
|
|
|
593
607
|
|
|
608
|
+
def _get_monitoring_schedules_file_user_application_path(
|
|
609
|
+
*, out_path: str, application: str
|
|
610
|
+
) -> str:
|
|
611
|
+
return os.path.join(
|
|
612
|
+
_get_monitoring_schedules_user_folder_path(out_path), f"{application}.json"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
|
|
594
616
|
def get_start_end(
|
|
595
617
|
start: Union[datetime.datetime, None],
|
|
596
618
|
end: Union[datetime.datetime, None],
|
mlrun/projects/project.py
CHANGED
|
@@ -1042,12 +1042,7 @@ class ProjectSpec(ModelObj):
|
|
|
1042
1042
|
artifact = artifact.to_dict()
|
|
1043
1043
|
else: # artifact is a dict
|
|
1044
1044
|
# imported/legacy artifacts don't have metadata,spec,status fields
|
|
1045
|
-
key_field = (
|
|
1046
|
-
"key"
|
|
1047
|
-
if _is_imported_artifact(artifact)
|
|
1048
|
-
or mlrun.utils.is_legacy_artifact(artifact)
|
|
1049
|
-
else "metadata.key"
|
|
1050
|
-
)
|
|
1045
|
+
key_field = "key" if _is_imported_artifact(artifact) else "metadata.key"
|
|
1051
1046
|
key = mlrun.utils.get_in(artifact, key_field, "")
|
|
1052
1047
|
if not key:
|
|
1053
1048
|
raise ValueError(f'artifacts "{key_field}" must be specified')
|
|
@@ -1889,7 +1884,7 @@ class MlrunProject(ModelObj):
|
|
|
1889
1884
|
def log_llm_prompt(
|
|
1890
1885
|
self,
|
|
1891
1886
|
key,
|
|
1892
|
-
|
|
1887
|
+
prompt_template: Optional[list[dict]] = None,
|
|
1893
1888
|
prompt_path: Optional[str] = None,
|
|
1894
1889
|
prompt_legend: Optional[dict] = None,
|
|
1895
1890
|
model_artifact: Union[ModelArtifact, str] = None,
|
|
@@ -1923,10 +1918,16 @@ class MlrunProject(ModelObj):
|
|
|
1923
1918
|
)
|
|
1924
1919
|
|
|
1925
1920
|
:param key: Unique key for the prompt artifact.
|
|
1926
|
-
:param
|
|
1921
|
+
:param prompt_template: Raw prompt list of dicts -
|
|
1922
|
+
[{"role": "system", "content": "You are a {profession} advisor"},
|
|
1923
|
+
"role": "user", "content": "I need your help with {profession}"]. only "role" and "content" keys allow in any
|
|
1924
|
+
str format (upper/lower case), keys will be modified to lower case.
|
|
1925
|
+
Cannot be used with `prompt_path`.
|
|
1927
1926
|
:param prompt_path: Path to a file containing the prompt. Mutually exclusive with `prompt_string`.
|
|
1928
1927
|
:param prompt_legend: A dictionary where each key is a placeholder in the prompt (e.g., ``{user_name}``)
|
|
1929
|
-
and the value is a description
|
|
1928
|
+
and the value is a dictionary holding two keys, "field", "description". "field" points to the field in
|
|
1929
|
+
the event where the value of the place-holder inside the event, if None or not exist will be replaced
|
|
1930
|
+
with the place-holder name. "description" will point to explanation of what that placeholder represents.
|
|
1930
1931
|
Useful for documenting and clarifying dynamic parts of the prompt.
|
|
1931
1932
|
:param model_artifact: Reference to the parent model (either `ModelArtifact` or model URI string).
|
|
1932
1933
|
:param model_configuration: Configuration dictionary for model generation parameters
|
|
@@ -1942,15 +1943,15 @@ class MlrunProject(ModelObj):
|
|
|
1942
1943
|
:returns: The logged `LLMPromptArtifact` object.
|
|
1943
1944
|
"""
|
|
1944
1945
|
|
|
1945
|
-
if not
|
|
1946
|
+
if not prompt_template and not prompt_path:
|
|
1946
1947
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1947
|
-
"Either '
|
|
1948
|
+
"Either 'prompt_template' or 'prompt_path' must be provided"
|
|
1948
1949
|
)
|
|
1949
1950
|
|
|
1950
1951
|
llm_prompt = LLMPromptArtifact(
|
|
1951
1952
|
key=key,
|
|
1952
1953
|
project=self.name,
|
|
1953
|
-
|
|
1954
|
+
prompt_template=prompt_template,
|
|
1954
1955
|
prompt_path=prompt_path,
|
|
1955
1956
|
prompt_legend=prompt_legend,
|
|
1956
1957
|
model_artifact=model_artifact,
|
|
@@ -5551,6 +5552,31 @@ class MlrunProject(ModelObj):
|
|
|
5551
5552
|
**kwargs,
|
|
5552
5553
|
)
|
|
5553
5554
|
|
|
5555
|
+
def get_drift_over_time(
|
|
5556
|
+
self,
|
|
5557
|
+
start: Optional[datetime.datetime] = None,
|
|
5558
|
+
end: Optional[datetime.datetime] = None,
|
|
5559
|
+
) -> mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues:
|
|
5560
|
+
"""
|
|
5561
|
+
Get drift counts over time for the project.
|
|
5562
|
+
|
|
5563
|
+
This method returns a list of tuples, each representing a time-interval (in a granularity set by the
|
|
5564
|
+
duration of the given time range) and the number of suspected drifts and detected drifts in that interval.
|
|
5565
|
+
For a range of 6 hours or less, the granularity is 10 minute, for a range of 2 hours to 72 hours, the
|
|
5566
|
+
granularity is 1 hour, and for a range of more than 72 hours, the granularity is 24 hours.
|
|
5567
|
+
|
|
5568
|
+
:param start: Start time of the range to retrieve drift counts from.
|
|
5569
|
+
:param end: End time of the range to retrieve drift counts from.
|
|
5570
|
+
|
|
5571
|
+
:return: A ModelEndpointDriftValues object containing the drift counts over time.
|
|
5572
|
+
"""
|
|
5573
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
5574
|
+
return db.get_drift_over_time(
|
|
5575
|
+
project=self.metadata.name,
|
|
5576
|
+
start=start,
|
|
5577
|
+
end=end,
|
|
5578
|
+
)
|
|
5579
|
+
|
|
5554
5580
|
def _run_authenticated_git_action(
|
|
5555
5581
|
self,
|
|
5556
5582
|
action: Callable,
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -93,6 +93,9 @@ class DaskSpec(KubeResourceSpec):
|
|
|
93
93
|
security_context=None,
|
|
94
94
|
state_thresholds=None,
|
|
95
95
|
serving_spec=None,
|
|
96
|
+
graph=None,
|
|
97
|
+
parameters=None,
|
|
98
|
+
track_models=None,
|
|
96
99
|
):
|
|
97
100
|
super().__init__(
|
|
98
101
|
command=command,
|
|
@@ -123,6 +126,9 @@ class DaskSpec(KubeResourceSpec):
|
|
|
123
126
|
security_context=security_context,
|
|
124
127
|
state_thresholds=state_thresholds,
|
|
125
128
|
serving_spec=serving_spec,
|
|
129
|
+
graph=graph,
|
|
130
|
+
parameters=parameters,
|
|
131
|
+
track_models=track_models,
|
|
126
132
|
)
|
|
127
133
|
self.args = args
|
|
128
134
|
|
|
@@ -55,6 +55,9 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
55
55
|
security_context=None,
|
|
56
56
|
state_thresholds=None,
|
|
57
57
|
serving_spec=None,
|
|
58
|
+
graph=None,
|
|
59
|
+
parameters=None,
|
|
60
|
+
track_models=None,
|
|
58
61
|
):
|
|
59
62
|
super().__init__(
|
|
60
63
|
command=command,
|
|
@@ -85,6 +88,9 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
85
88
|
security_context=security_context,
|
|
86
89
|
state_thresholds=state_thresholds,
|
|
87
90
|
serving_spec=serving_spec,
|
|
91
|
+
graph=graph,
|
|
92
|
+
parameters=parameters,
|
|
93
|
+
track_models=track_models,
|
|
88
94
|
)
|
|
89
95
|
self.mpi_args = mpi_args or [
|
|
90
96
|
"-x",
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -50,6 +50,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
50
50
|
security_context=None,
|
|
51
51
|
state_thresholds=None,
|
|
52
52
|
serving_spec=None,
|
|
53
|
+
graph=None,
|
|
54
|
+
parameters=None,
|
|
55
|
+
track_models=None,
|
|
53
56
|
):
|
|
54
57
|
super().__init__(
|
|
55
58
|
command=command,
|
|
@@ -81,6 +84,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
81
84
|
security_context=security_context,
|
|
82
85
|
state_thresholds=state_thresholds,
|
|
83
86
|
serving_spec=serving_spec,
|
|
87
|
+
graph=graph,
|
|
88
|
+
parameters=parameters,
|
|
89
|
+
track_models=track_models,
|
|
84
90
|
)
|
|
85
91
|
self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()
|
|
86
92
|
|
|
@@ -400,8 +400,10 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
400
400
|
# nuclio implementation detail - when providing the image and emptying out the source code and build source,
|
|
401
401
|
# nuclio skips rebuilding the image and simply takes the prebuilt image
|
|
402
402
|
self.spec.build.functionSourceCode = ""
|
|
403
|
+
self.spec.config.pop("spec.build.functionSourceCode", None)
|
|
403
404
|
self.status.application_source = self.spec.build.source
|
|
404
405
|
self.spec.build.source = ""
|
|
406
|
+
self.spec.config.pop("spec.build.source", None)
|
|
405
407
|
|
|
406
408
|
# save the image in the status, so we won't repopulate the function source code
|
|
407
409
|
self.status.container_image = image
|
|
@@ -155,6 +155,9 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
155
155
|
state_thresholds=None,
|
|
156
156
|
disable_default_http_trigger=None,
|
|
157
157
|
serving_spec=None,
|
|
158
|
+
graph=None,
|
|
159
|
+
parameters=None,
|
|
160
|
+
track_models=None,
|
|
158
161
|
):
|
|
159
162
|
super().__init__(
|
|
160
163
|
command=command,
|
|
@@ -185,6 +188,9 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
185
188
|
security_context=security_context,
|
|
186
189
|
state_thresholds=state_thresholds,
|
|
187
190
|
serving_spec=serving_spec,
|
|
191
|
+
graph=graph,
|
|
192
|
+
parameters=parameters,
|
|
193
|
+
track_models=track_models,
|
|
188
194
|
)
|
|
189
195
|
|
|
190
196
|
self.base_spec = base_spec or {}
|