mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +2 -0
- mlrun/common/constants.py +6 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/api_gateway.py +8 -1
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +35 -18
- mlrun/common/schemas/project.py +1 -0
- mlrun/common/types.py +7 -1
- mlrun/config.py +34 -10
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +22 -16
- mlrun/datastore/datastore.py +4 -0
- mlrun/datastore/datastore_profile.py +7 -0
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/sources.py +2 -3
- mlrun/datastore/targets.py +6 -1
- mlrun/db/base.py +14 -6
- mlrun/db/httpdb.py +61 -56
- mlrun/db/nopdb.py +3 -0
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +6 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +20 -8
- mlrun/kfpops.py +2 -5
- mlrun/model.py +1 -0
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +104 -295
- mlrun/model_monitoring/controller.py +25 -25
- mlrun/model_monitoring/db/__init__.py +16 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
- mlrun/model_monitoring/helpers.py +3 -3
- mlrun/model_monitoring/stream_processing.py +41 -9
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +4 -36
- mlrun/projects/pipelines.py +14 -2
- mlrun/projects/project.py +118 -103
- mlrun/run.py +5 -1
- mlrun/runtimes/base.py +6 -0
- mlrun/runtimes/nuclio/api_gateway.py +218 -65
- mlrun/runtimes/nuclio/function.py +3 -0
- mlrun/runtimes/nuclio/serving.py +28 -32
- mlrun/runtimes/pod.py +26 -0
- mlrun/serving/routers.py +4 -3
- mlrun/serving/server.py +4 -6
- mlrun/serving/states.py +34 -14
- mlrun/serving/v2_serving.py +4 -3
- mlrun/utils/helpers.py +34 -0
- mlrun/utils/http.py +1 -1
- mlrun/utils/retryer.py +1 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/METADATA +25 -16
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/RECORD +66 -62
- mlrun/model_monitoring/batch.py +0 -933
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/mysql.py +0 -34
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/top_level.txt +0 -0
|
@@ -16,17 +16,18 @@
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
import typing
|
|
19
|
+
from http import HTTPStatus
|
|
19
20
|
|
|
20
21
|
import v3io.dataplane
|
|
22
|
+
import v3io.dataplane.response
|
|
21
23
|
import v3io_frames
|
|
22
24
|
|
|
23
25
|
import mlrun.common.model_monitoring.helpers
|
|
24
26
|
import mlrun.common.schemas.model_monitoring
|
|
27
|
+
import mlrun.model_monitoring.db
|
|
25
28
|
import mlrun.utils.v3io_clients
|
|
26
29
|
from mlrun.utils import logger
|
|
27
30
|
|
|
28
|
-
from .model_endpoint_store import ModelEndpointStore
|
|
29
|
-
|
|
30
31
|
# Fields to encode before storing in the KV table or to decode after retrieving
|
|
31
32
|
fields_to_encode_decode = [
|
|
32
33
|
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS,
|
|
@@ -34,7 +35,7 @@ fields_to_encode_decode = [
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
class
|
|
38
|
+
class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
38
39
|
"""
|
|
39
40
|
Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
|
|
40
41
|
client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
|
|
@@ -394,6 +395,128 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
394
395
|
|
|
395
396
|
return metrics_mapping
|
|
396
397
|
|
|
398
|
+
def write_application_result(self, event: dict[str, typing.Any]):
|
|
399
|
+
"""
|
|
400
|
+
Write a new application result event in the target table.
|
|
401
|
+
|
|
402
|
+
:param event: An event dictionary that represents the application result, should be corresponded to the
|
|
403
|
+
schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
404
|
+
object.
|
|
405
|
+
"""
|
|
406
|
+
endpoint_id = event.pop(
|
|
407
|
+
mlrun.common.schemas.model_monitoring.WriterEvent.ENDPOINT_ID
|
|
408
|
+
)
|
|
409
|
+
app_name = event.pop(
|
|
410
|
+
mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME
|
|
411
|
+
)
|
|
412
|
+
metric_name = event.pop(
|
|
413
|
+
mlrun.common.schemas.model_monitoring.WriterEvent.RESULT_NAME
|
|
414
|
+
)
|
|
415
|
+
attributes = {metric_name: json.dumps(event)}
|
|
416
|
+
|
|
417
|
+
v3io_monitoring_apps_container = self.get_v3io_monitoring_apps_container(
|
|
418
|
+
project_name=self.project
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
self.client.kv.update(
|
|
422
|
+
container=v3io_monitoring_apps_container,
|
|
423
|
+
table_path=endpoint_id,
|
|
424
|
+
key=app_name,
|
|
425
|
+
attributes=attributes,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
schema_file = self.client.kv.new_cursor(
|
|
429
|
+
container=v3io_monitoring_apps_container,
|
|
430
|
+
table_path=endpoint_id,
|
|
431
|
+
filter_expression='__name==".#schema"',
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if not schema_file.all():
|
|
435
|
+
logger.info(
|
|
436
|
+
"Generate a new V3IO KV schema file",
|
|
437
|
+
container=v3io_monitoring_apps_container,
|
|
438
|
+
endpoint_id=endpoint_id,
|
|
439
|
+
)
|
|
440
|
+
self._generate_kv_schema(endpoint_id, v3io_monitoring_apps_container)
|
|
441
|
+
logger.info("Updated V3IO KV successfully", key=app_name)
|
|
442
|
+
|
|
443
|
+
def _generate_kv_schema(
|
|
444
|
+
self, endpoint_id: str, v3io_monitoring_apps_container: str
|
|
445
|
+
):
|
|
446
|
+
"""Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
|
|
447
|
+
fields = [
|
|
448
|
+
{
|
|
449
|
+
"name": mlrun.common.schemas.model_monitoring.WriterEvent.RESULT_NAME,
|
|
450
|
+
"type": "string",
|
|
451
|
+
"nullable": False,
|
|
452
|
+
}
|
|
453
|
+
]
|
|
454
|
+
res = self.client.kv.create_schema(
|
|
455
|
+
container=v3io_monitoring_apps_container,
|
|
456
|
+
table_path=endpoint_id,
|
|
457
|
+
key=mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME,
|
|
458
|
+
fields=fields,
|
|
459
|
+
)
|
|
460
|
+
if res.status_code != HTTPStatus.OK:
|
|
461
|
+
raise mlrun.errors.MLRunBadRequestError(
|
|
462
|
+
f"Couldn't infer schema for endpoint {endpoint_id} which is required for Grafana dashboards"
|
|
463
|
+
)
|
|
464
|
+
else:
|
|
465
|
+
logger.info(
|
|
466
|
+
"Generated V3IO KV schema successfully", endpoint_id=endpoint_id
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
470
|
+
"""
|
|
471
|
+
Get the last analyzed time for the provided model endpoint and application.
|
|
472
|
+
|
|
473
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
474
|
+
:param application_name: Registered application name.
|
|
475
|
+
|
|
476
|
+
:return: Timestamp as a Unix time.
|
|
477
|
+
:raise: MLRunNotFoundError if last analyzed value is not found.
|
|
478
|
+
|
|
479
|
+
"""
|
|
480
|
+
try:
|
|
481
|
+
data = self.client.kv.get(
|
|
482
|
+
container=self._get_monitoring_schedules_container(
|
|
483
|
+
project_name=self.project
|
|
484
|
+
),
|
|
485
|
+
table_path=endpoint_id,
|
|
486
|
+
key=application_name,
|
|
487
|
+
)
|
|
488
|
+
return data.output.item[
|
|
489
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED
|
|
490
|
+
]
|
|
491
|
+
except v3io.dataplane.response.HttpResponseError as err:
|
|
492
|
+
logger.debug("Error while getting last analyzed time", err=err)
|
|
493
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
494
|
+
f"No last analyzed value has been found for {application_name} "
|
|
495
|
+
f"that processes model endpoint {endpoint_id}",
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
def update_last_analyzed(
|
|
499
|
+
self, endpoint_id: str, application_name: str, last_analyzed: int
|
|
500
|
+
):
|
|
501
|
+
"""
|
|
502
|
+
Update the last analyzed time for the provided model endpoint and application.
|
|
503
|
+
|
|
504
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
505
|
+
:param application_name: Registered application name.
|
|
506
|
+
:param last_analyzed: Timestamp as a Unix time that represents the last analyzed time of a certain
|
|
507
|
+
application and model endpoint.
|
|
508
|
+
"""
|
|
509
|
+
self.client.kv.put(
|
|
510
|
+
container=self._get_monitoring_schedules_container(
|
|
511
|
+
project_name=self.project
|
|
512
|
+
),
|
|
513
|
+
table_path=endpoint_id,
|
|
514
|
+
key=application_name,
|
|
515
|
+
attributes={
|
|
516
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED: last_analyzed
|
|
517
|
+
},
|
|
518
|
+
)
|
|
519
|
+
|
|
397
520
|
def _generate_tsdb_paths(self) -> tuple[str, str]:
|
|
398
521
|
"""Generate a short path to the TSDB resources and a filtered path for the frames object
|
|
399
522
|
:return: A tuple of:
|
|
@@ -572,3 +695,11 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
572
695
|
if isinstance(field, bytes):
|
|
573
696
|
return field.decode()
|
|
574
697
|
return field
|
|
698
|
+
|
|
699
|
+
@staticmethod
|
|
700
|
+
def get_v3io_monitoring_apps_container(project_name: str) -> str:
|
|
701
|
+
return f"users/pipelines/{project_name}/monitoring-apps"
|
|
702
|
+
|
|
703
|
+
@staticmethod
|
|
704
|
+
def _get_monitoring_schedules_container(project_name: str) -> str:
|
|
705
|
+
return f"users/pipelines/{project_name}/monitoring-schedules/functions"
|
|
@@ -42,7 +42,7 @@ class _BatchDict(typing.TypedDict):
|
|
|
42
42
|
def get_stream_path(
|
|
43
43
|
project: str = None,
|
|
44
44
|
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
45
|
-
):
|
|
45
|
+
) -> str:
|
|
46
46
|
"""
|
|
47
47
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
48
48
|
|
|
@@ -54,8 +54,6 @@ def get_stream_path(
|
|
|
54
54
|
|
|
55
55
|
stream_uri = mlrun.get_secret_or_env(
|
|
56
56
|
mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
|
|
57
|
-
if function_name is mm_constants.MonitoringFunctionNames.STREAM
|
|
58
|
-
else ""
|
|
59
57
|
) or mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
60
58
|
project=project,
|
|
61
59
|
kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
|
|
@@ -63,6 +61,8 @@ def get_stream_path(
|
|
|
63
61
|
function_name=function_name,
|
|
64
62
|
)
|
|
65
63
|
|
|
64
|
+
if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
|
|
65
|
+
stream_uri = stream_uri[1] # get new stream path, under projects
|
|
66
66
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
67
67
|
stream_uri=stream_uri, project=project, function_name=function_name
|
|
68
68
|
)
|
|
@@ -24,7 +24,9 @@ import mlrun
|
|
|
24
24
|
import mlrun.common.model_monitoring.helpers
|
|
25
25
|
import mlrun.config
|
|
26
26
|
import mlrun.datastore.targets
|
|
27
|
+
import mlrun.feature_store as fstore
|
|
27
28
|
import mlrun.feature_store.steps
|
|
29
|
+
import mlrun.model_monitoring.db
|
|
28
30
|
import mlrun.model_monitoring.prometheus
|
|
29
31
|
import mlrun.serving.states
|
|
30
32
|
import mlrun.utils
|
|
@@ -36,6 +38,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
|
|
|
36
38
|
FileTargetKind,
|
|
37
39
|
ModelEndpointTarget,
|
|
38
40
|
ProjectSecretKeys,
|
|
41
|
+
PrometheusEndpoints,
|
|
39
42
|
)
|
|
40
43
|
from mlrun.utils import logger
|
|
41
44
|
|
|
@@ -183,11 +186,11 @@ class EventStreamProcessor:
|
|
|
183
186
|
# Step 2 - Filter out events with '-' in the path basename from going forward
|
|
184
187
|
# through the next steps of the stream graph
|
|
185
188
|
def apply_storey_filter_stream_events():
|
|
186
|
-
#
|
|
189
|
+
# Filter events with Prometheus endpoints path
|
|
187
190
|
graph.add_step(
|
|
188
191
|
"storey.Filter",
|
|
189
192
|
"filter_stream_event",
|
|
190
|
-
_fn="(
|
|
193
|
+
_fn=f"(event.path not in {PrometheusEndpoints.list()})",
|
|
191
194
|
full_event=True,
|
|
192
195
|
)
|
|
193
196
|
|
|
@@ -933,6 +936,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
933
936
|
def do(self, event: dict):
|
|
934
937
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
935
938
|
|
|
939
|
+
feature_values = event[EventFieldType.FEATURES]
|
|
940
|
+
label_values = event[EventFieldType.PREDICTION]
|
|
936
941
|
# Get feature names and label columns
|
|
937
942
|
if endpoint_id not in self.feature_names:
|
|
938
943
|
endpoint_record = get_endpoint_record(
|
|
@@ -968,6 +973,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
968
973
|
},
|
|
969
974
|
)
|
|
970
975
|
|
|
976
|
+
update_monitoring_feature_set(
|
|
977
|
+
endpoint_record=endpoint_record,
|
|
978
|
+
feature_names=feature_names,
|
|
979
|
+
feature_values=feature_values,
|
|
980
|
+
)
|
|
981
|
+
|
|
971
982
|
# Similar process with label columns
|
|
972
983
|
if not label_columns and self._infer_columns_from_data:
|
|
973
984
|
label_columns = self._infer_label_columns_from_data(event)
|
|
@@ -986,6 +997,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
986
997
|
endpoint_id=endpoint_id,
|
|
987
998
|
attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
|
|
988
999
|
)
|
|
1000
|
+
update_monitoring_feature_set(
|
|
1001
|
+
endpoint_record=endpoint_record,
|
|
1002
|
+
feature_names=label_columns,
|
|
1003
|
+
feature_values=label_values,
|
|
1004
|
+
)
|
|
989
1005
|
|
|
990
1006
|
self.label_columns[endpoint_id] = label_columns
|
|
991
1007
|
self.feature_names[endpoint_id] = feature_names
|
|
@@ -1003,7 +1019,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1003
1019
|
|
|
1004
1020
|
# Add feature_name:value pairs along with a mapping dictionary of all of these pairs
|
|
1005
1021
|
feature_names = self.feature_names[endpoint_id]
|
|
1006
|
-
feature_values = event[EventFieldType.FEATURES]
|
|
1007
1022
|
self._map_dictionary_values(
|
|
1008
1023
|
event=event,
|
|
1009
1024
|
named_iters=feature_names,
|
|
@@ -1013,7 +1028,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
1013
1028
|
|
|
1014
1029
|
# Add label_name:value pairs along with a mapping dictionary of all of these pairs
|
|
1015
1030
|
label_names = self.label_columns[endpoint_id]
|
|
1016
|
-
label_values = event[EventFieldType.PREDICTION]
|
|
1017
1031
|
self._map_dictionary_values(
|
|
1018
1032
|
event=event,
|
|
1019
1033
|
named_iters=label_names,
|
|
@@ -1139,10 +1153,10 @@ class EventRouting(mlrun.feature_store.steps.MapClass):
|
|
|
1139
1153
|
self.project: str = project
|
|
1140
1154
|
|
|
1141
1155
|
def do(self, event):
|
|
1142
|
-
if event.path ==
|
|
1156
|
+
if event.path == PrometheusEndpoints.MODEL_MONITORING_METRICS:
|
|
1143
1157
|
# Return a parsed Prometheus registry file
|
|
1144
1158
|
event.body = mlrun.model_monitoring.prometheus.get_registry()
|
|
1145
|
-
elif event.path ==
|
|
1159
|
+
elif event.path == PrometheusEndpoints.MONITORING_BATCH_METRICS:
|
|
1146
1160
|
# Update statistical metrics
|
|
1147
1161
|
for event_metric in event.body:
|
|
1148
1162
|
mlrun.model_monitoring.prometheus.write_drift_metrics(
|
|
@@ -1151,7 +1165,7 @@ class EventRouting(mlrun.feature_store.steps.MapClass):
|
|
|
1151
1165
|
metric=event_metric[EventFieldType.METRIC],
|
|
1152
1166
|
value=event_metric[EventFieldType.VALUE],
|
|
1153
1167
|
)
|
|
1154
|
-
elif event.path ==
|
|
1168
|
+
elif event.path == PrometheusEndpoints.MONITORING_DRIFT_STATUS:
|
|
1155
1169
|
# Update drift status
|
|
1156
1170
|
mlrun.model_monitoring.prometheus.write_drift_status(
|
|
1157
1171
|
project=self.project,
|
|
@@ -1211,7 +1225,7 @@ def update_endpoint_record(
|
|
|
1211
1225
|
endpoint_id: str,
|
|
1212
1226
|
attributes: dict,
|
|
1213
1227
|
):
|
|
1214
|
-
model_endpoint_store = mlrun.model_monitoring.
|
|
1228
|
+
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
1215
1229
|
project=project,
|
|
1216
1230
|
)
|
|
1217
1231
|
|
|
@@ -1221,7 +1235,25 @@ def update_endpoint_record(
|
|
|
1221
1235
|
|
|
1222
1236
|
|
|
1223
1237
|
def get_endpoint_record(project: str, endpoint_id: str):
|
|
1224
|
-
model_endpoint_store = mlrun.model_monitoring.
|
|
1238
|
+
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
1225
1239
|
project=project,
|
|
1226
1240
|
)
|
|
1227
1241
|
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
1242
|
+
|
|
1243
|
+
|
|
1244
|
+
def update_monitoring_feature_set(
|
|
1245
|
+
endpoint_record: dict[str, typing.Any],
|
|
1246
|
+
feature_names: list[str],
|
|
1247
|
+
feature_values: list[typing.Any],
|
|
1248
|
+
):
|
|
1249
|
+
monitoring_feature_set = fstore.get_feature_set(
|
|
1250
|
+
endpoint_record[
|
|
1251
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
|
|
1252
|
+
]
|
|
1253
|
+
)
|
|
1254
|
+
for name, val in zip(feature_names, feature_values):
|
|
1255
|
+
monitoring_feature_set.add_feature(
|
|
1256
|
+
fstore.Feature(name=name, value_type=type(val))
|
|
1257
|
+
)
|
|
1258
|
+
|
|
1259
|
+
monitoring_feature_set.save()
|
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
15
|
+
import warnings
|
|
16
16
|
from typing import Union
|
|
17
17
|
|
|
18
18
|
import mlrun.common.schemas.schedule
|
|
@@ -55,6 +55,12 @@ class TrackingPolicy(mlrun.model.ModelObj):
|
|
|
55
55
|
writer function, which is a real time nuclio functino, will be deployed
|
|
56
56
|
with the same image. By default, the image is mlrun/mlrun.
|
|
57
57
|
"""
|
|
58
|
+
warnings.warn(
|
|
59
|
+
"The `TrackingPolicy` class is deprecated from version 1.7.0 and is not "
|
|
60
|
+
"used anymore. It will be removed in 1.9.0.",
|
|
61
|
+
FutureWarning,
|
|
62
|
+
)
|
|
63
|
+
|
|
58
64
|
if isinstance(default_batch_intervals, str):
|
|
59
65
|
default_batch_intervals = (
|
|
60
66
|
mlrun.common.schemas.schedule.ScheduleCronTrigger.from_crontab(
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
16
|
import json
|
|
17
|
-
from http import HTTPStatus
|
|
18
17
|
from typing import Any, NewType
|
|
19
18
|
|
|
20
19
|
import pandas as pd
|
|
@@ -25,6 +24,7 @@ from v3io_frames.frames_pb2 import IGNORE
|
|
|
25
24
|
|
|
26
25
|
import mlrun.common.model_monitoring
|
|
27
26
|
import mlrun.model_monitoring
|
|
27
|
+
import mlrun.model_monitoring.db.stores
|
|
28
28
|
import mlrun.utils.v3io_clients
|
|
29
29
|
from mlrun.common.schemas.model_monitoring.constants import ResultStatusApp, WriterEvent
|
|
30
30
|
from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
|
|
@@ -106,13 +106,11 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
106
106
|
self.project = project
|
|
107
107
|
self.name = project # required for the deployment process
|
|
108
108
|
self._v3io_container = self.get_v3io_container(self.name)
|
|
109
|
-
self._kv_client = self._get_v3io_client().kv
|
|
110
109
|
self._tsdb_client = self._get_v3io_frames_client(self._v3io_container)
|
|
111
110
|
self._custom_notifier = CustomNotificationPusher(
|
|
112
111
|
notification_types=[NotificationKind.slack]
|
|
113
112
|
)
|
|
114
113
|
self._create_tsdb_table()
|
|
115
|
-
self._kv_schemas = []
|
|
116
114
|
|
|
117
115
|
@staticmethod
|
|
118
116
|
def get_v3io_container(project_name: str) -> str:
|
|
@@ -141,40 +139,10 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
141
139
|
|
|
142
140
|
def _update_kv_db(self, event: _AppResultEvent) -> None:
|
|
143
141
|
event = _AppResultEvent(event.copy())
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
metric_name = event.pop(WriterEvent.RESULT_NAME)
|
|
147
|
-
attributes = {metric_name: json.dumps(event)}
|
|
148
|
-
self._kv_client.update(
|
|
149
|
-
container=self._v3io_container,
|
|
150
|
-
table_path=endpoint_id,
|
|
151
|
-
key=app_name,
|
|
152
|
-
attributes=attributes,
|
|
142
|
+
application_result_store = mlrun.model_monitoring.get_store_object(
|
|
143
|
+
project=self.project
|
|
153
144
|
)
|
|
154
|
-
|
|
155
|
-
self._generate_kv_schema(endpoint_id)
|
|
156
|
-
logger.info("Updated V3IO KV successfully", key=app_name)
|
|
157
|
-
|
|
158
|
-
def _generate_kv_schema(self, endpoint_id: str):
|
|
159
|
-
"""Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
|
|
160
|
-
fields = [
|
|
161
|
-
{"name": WriterEvent.RESULT_NAME, "type": "string", "nullable": False}
|
|
162
|
-
]
|
|
163
|
-
res = self._kv_client.create_schema(
|
|
164
|
-
container=self._v3io_container,
|
|
165
|
-
table_path=endpoint_id,
|
|
166
|
-
key=WriterEvent.APPLICATION_NAME,
|
|
167
|
-
fields=fields,
|
|
168
|
-
)
|
|
169
|
-
if res.status_code != HTTPStatus.OK.value:
|
|
170
|
-
raise mlrun.errors.MLRunBadRequestError(
|
|
171
|
-
f"Couldn't infer schema for endpoint {endpoint_id} which is required for Grafana dashboards"
|
|
172
|
-
)
|
|
173
|
-
else:
|
|
174
|
-
logger.info(
|
|
175
|
-
"Generated V3IO KV schema successfully", endpoint_id=endpoint_id
|
|
176
|
-
)
|
|
177
|
-
self._kv_schemas.append(endpoint_id)
|
|
145
|
+
application_result_store.write_application_result(event=event)
|
|
178
146
|
|
|
179
147
|
def _update_tsdb(self, event: _AppResultEvent) -> None:
|
|
180
148
|
event = _AppResultEvent(event.copy())
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -412,6 +412,11 @@ def enrich_function_object(
|
|
|
412
412
|
if decorator:
|
|
413
413
|
decorator(f)
|
|
414
414
|
|
|
415
|
+
if project.spec.default_function_node_selector:
|
|
416
|
+
f.enrich_runtime_spec(
|
|
417
|
+
project.spec.default_function_node_selector,
|
|
418
|
+
)
|
|
419
|
+
|
|
415
420
|
if try_auto_mount:
|
|
416
421
|
if (
|
|
417
422
|
decorator and AutoMountType.is_auto_modifier(decorator)
|
|
@@ -608,6 +613,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
608
613
|
namespace=namespace,
|
|
609
614
|
artifact_path=artifact_path,
|
|
610
615
|
cleanup_ttl=workflow_spec.cleanup_ttl,
|
|
616
|
+
timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
|
|
611
617
|
)
|
|
612
618
|
|
|
613
619
|
# The user provided workflow code might have made changes to function specs that require cleanup
|
|
@@ -865,15 +871,21 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
865
871
|
)
|
|
866
872
|
return
|
|
867
873
|
|
|
874
|
+
get_workflow_id_timeout = max(
|
|
875
|
+
int(mlrun.mlconf.workflows.timeouts.remote),
|
|
876
|
+
int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
|
|
877
|
+
)
|
|
878
|
+
|
|
868
879
|
logger.debug(
|
|
869
880
|
"Workflow submitted, waiting for pipeline run to start",
|
|
870
881
|
workflow_name=workflow_response.name,
|
|
882
|
+
get_workflow_id_timeout=get_workflow_id_timeout,
|
|
871
883
|
)
|
|
872
884
|
|
|
873
885
|
# Getting workflow id from run:
|
|
874
886
|
response = retry_until_successful(
|
|
875
887
|
1,
|
|
876
|
-
|
|
888
|
+
get_workflow_id_timeout,
|
|
877
889
|
logger,
|
|
878
890
|
False,
|
|
879
891
|
run_db.get_workflow_id,
|
|
@@ -1059,7 +1071,7 @@ def load_and_run(
|
|
|
1059
1071
|
)
|
|
1060
1072
|
|
|
1061
1073
|
except Exception as exc:
|
|
1062
|
-
logger.error("Failed to send slack notification", exc=exc)
|
|
1074
|
+
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1063
1075
|
|
|
1064
1076
|
raise error
|
|
1065
1077
|
|