mlrun 1.7.0rc26__py3-none-any.whl → 1.7.0rc31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +7 -7
- mlrun/alerts/alert.py +13 -1
- mlrun/artifacts/manager.py +5 -0
- mlrun/common/constants.py +3 -3
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/formatters/base.py +9 -9
- mlrun/common/schemas/alert.py +4 -8
- mlrun/common/schemas/api_gateway.py +7 -0
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +32 -13
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
- mlrun/common/schemas/project.py +10 -9
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/config.py +37 -11
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +48 -16
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/azure_blob.py +2 -1
- mlrun/datastore/base.py +21 -13
- mlrun/datastore/datastore.py +7 -5
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -0
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/s3.py +2 -0
- mlrun/datastore/snowflake_utils.py +3 -1
- mlrun/datastore/sources.py +40 -11
- mlrun/datastore/store_resources.py +2 -0
- mlrun/datastore/targets.py +71 -26
- mlrun/db/base.py +11 -0
- mlrun/db/httpdb.py +50 -31
- mlrun/db/nopdb.py +11 -1
- mlrun/errors.py +4 -0
- mlrun/execution.py +18 -10
- mlrun/feature_store/retrieval/spark_merger.py +4 -32
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +27 -1
- mlrun/model_monitoring/api.py +9 -55
- mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
- mlrun/model_monitoring/controller.py +57 -73
- mlrun/model_monitoring/db/stores/__init__.py +21 -9
- mlrun/model_monitoring/db/stores/base/store.py +39 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +41 -80
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +22 -27
- mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
- mlrun/model_monitoring/helpers.py +15 -17
- mlrun/model_monitoring/writer.py +2 -7
- mlrun/projects/operations.py +1 -0
- mlrun/projects/project.py +87 -75
- mlrun/render.py +10 -5
- mlrun/run.py +7 -7
- mlrun/runtimes/base.py +1 -1
- mlrun/runtimes/daskjob.py +7 -1
- mlrun/runtimes/local.py +24 -7
- mlrun/runtimes/nuclio/function.py +20 -0
- mlrun/runtimes/pod.py +5 -29
- mlrun/serving/routers.py +75 -59
- mlrun/serving/server.py +1 -0
- mlrun/serving/v2_serving.py +8 -1
- mlrun/utils/helpers.py +46 -2
- mlrun/utils/logger.py +36 -2
- mlrun/utils/notifications/notification/base.py +4 -0
- mlrun/utils/notifications/notification/git.py +21 -0
- mlrun/utils/notifications/notification/slack.py +8 -0
- mlrun/utils/notifications/notification/webhook.py +41 -1
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/METADATA +13 -8
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/RECORD +76 -78
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/top_level.txt +0 -0
mlrun/db/nopdb.py
CHANGED
|
@@ -162,6 +162,7 @@ class NopDB(RunDBInterface):
|
|
|
162
162
|
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
163
163
|
),
|
|
164
164
|
secrets: dict = None,
|
|
165
|
+
iter=None,
|
|
165
166
|
):
|
|
166
167
|
pass
|
|
167
168
|
|
|
@@ -708,6 +709,7 @@ class NopDB(RunDBInterface):
|
|
|
708
709
|
image: str = "mlrun/mlrun",
|
|
709
710
|
deploy_histogram_data_drift_app: bool = True,
|
|
710
711
|
rebuild_images: bool = False,
|
|
712
|
+
fetch_credentials_from_sys_config: bool = False,
|
|
711
713
|
) -> None:
|
|
712
714
|
pass
|
|
713
715
|
|
|
@@ -730,7 +732,15 @@ class NopDB(RunDBInterface):
|
|
|
730
732
|
def deploy_histogram_data_drift_app(
|
|
731
733
|
self, project: str, image: str = "mlrun/mlrun"
|
|
732
734
|
) -> None:
|
|
733
|
-
|
|
735
|
+
pass
|
|
736
|
+
|
|
737
|
+
def set_model_monitoring_credentials(
|
|
738
|
+
self,
|
|
739
|
+
project: str,
|
|
740
|
+
credentials: dict[str, str],
|
|
741
|
+
replace_creds: bool,
|
|
742
|
+
) -> None:
|
|
743
|
+
pass
|
|
734
744
|
|
|
735
745
|
def generate_event(
|
|
736
746
|
self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
|
mlrun/errors.py
CHANGED
|
@@ -205,6 +205,10 @@ class MLRunTimeoutError(MLRunHTTPStatusError, TimeoutError):
|
|
|
205
205
|
error_status_code = HTTPStatus.GATEWAY_TIMEOUT.value
|
|
206
206
|
|
|
207
207
|
|
|
208
|
+
class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
|
|
209
|
+
error_status_code = HTTPStatus.BAD_REQUEST.value
|
|
210
|
+
|
|
211
|
+
|
|
208
212
|
class MLRunRetryExhaustedError(Exception):
|
|
209
213
|
pass
|
|
210
214
|
|
mlrun/execution.py
CHANGED
|
@@ -34,13 +34,13 @@ from .features import Feature
|
|
|
34
34
|
from .model import HyperParamOptions
|
|
35
35
|
from .secrets import SecretsStore
|
|
36
36
|
from .utils import (
|
|
37
|
+
RunKeys,
|
|
37
38
|
dict_to_json,
|
|
38
39
|
dict_to_yaml,
|
|
39
40
|
get_in,
|
|
40
41
|
is_relative_path,
|
|
41
42
|
logger,
|
|
42
43
|
now_date,
|
|
43
|
-
run_keys,
|
|
44
44
|
to_date_str,
|
|
45
45
|
update_in,
|
|
46
46
|
)
|
|
@@ -85,6 +85,7 @@ class MLClientCtx:
|
|
|
85
85
|
|
|
86
86
|
self._labels = {}
|
|
87
87
|
self._annotations = {}
|
|
88
|
+
self._node_selector = {}
|
|
88
89
|
|
|
89
90
|
self._function = ""
|
|
90
91
|
self._parameters = {}
|
|
@@ -207,6 +208,11 @@ class MLClientCtx:
|
|
|
207
208
|
"""Dictionary with labels (read-only)"""
|
|
208
209
|
return deepcopy(self._labels)
|
|
209
210
|
|
|
211
|
+
@property
|
|
212
|
+
def node_selector(self):
|
|
213
|
+
"""Dictionary with node selectors (read-only)"""
|
|
214
|
+
return deepcopy(self._node_selector)
|
|
215
|
+
|
|
210
216
|
@property
|
|
211
217
|
def annotations(self):
|
|
212
218
|
"""Dictionary with annotations (read-only)"""
|
|
@@ -365,7 +371,7 @@ class MLClientCtx:
|
|
|
365
371
|
self._labels = meta.get("labels", self._labels)
|
|
366
372
|
spec = attrs.get("spec")
|
|
367
373
|
if spec:
|
|
368
|
-
self._secrets_manager = SecretsStore.from_list(spec.get(
|
|
374
|
+
self._secrets_manager = SecretsStore.from_list(spec.get(RunKeys.secrets))
|
|
369
375
|
self._log_level = spec.get("log_level", self._log_level)
|
|
370
376
|
self._function = spec.get("function", self._function)
|
|
371
377
|
self._parameters = spec.get("parameters", self._parameters)
|
|
@@ -383,13 +389,14 @@ class MLClientCtx:
|
|
|
383
389
|
self._allow_empty_resources = spec.get(
|
|
384
390
|
"allow_empty_resources", self._allow_empty_resources
|
|
385
391
|
)
|
|
386
|
-
self.artifact_path = spec.get(
|
|
387
|
-
self._in_path = spec.get(
|
|
388
|
-
inputs = spec.get(
|
|
392
|
+
self.artifact_path = spec.get(RunKeys.output_path, self.artifact_path)
|
|
393
|
+
self._in_path = spec.get(RunKeys.input_path, self._in_path)
|
|
394
|
+
inputs = spec.get(RunKeys.inputs)
|
|
389
395
|
self._notifications = spec.get("notifications", self._notifications)
|
|
390
396
|
self._state_thresholds = spec.get(
|
|
391
397
|
"state_thresholds", self._state_thresholds
|
|
392
398
|
)
|
|
399
|
+
self._node_selector = spec.get("node_selector", self._node_selector)
|
|
393
400
|
self._reset_on_run = spec.get("reset_on_run", self._reset_on_run)
|
|
394
401
|
|
|
395
402
|
self._init_dbs(rundb)
|
|
@@ -567,7 +574,7 @@ class MLClientCtx:
|
|
|
567
574
|
self._results["best_iteration"] = best
|
|
568
575
|
for k, v in get_in(task, ["status", "results"], {}).items():
|
|
569
576
|
self._results[k] = v
|
|
570
|
-
for artifact in get_in(task, ["status",
|
|
577
|
+
for artifact in get_in(task, ["status", RunKeys.artifacts], []):
|
|
571
578
|
self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
|
|
572
579
|
artifact
|
|
573
580
|
)
|
|
@@ -939,10 +946,11 @@ class MLClientCtx:
|
|
|
939
946
|
"parameters": self._parameters,
|
|
940
947
|
"handler": self._handler,
|
|
941
948
|
"outputs": self._outputs,
|
|
942
|
-
|
|
943
|
-
|
|
949
|
+
RunKeys.output_path: self.artifact_path,
|
|
950
|
+
RunKeys.inputs: self._inputs,
|
|
944
951
|
"notifications": self._notifications,
|
|
945
952
|
"state_thresholds": self._state_thresholds,
|
|
953
|
+
"node_selector": self._node_selector,
|
|
946
954
|
},
|
|
947
955
|
"status": {
|
|
948
956
|
"results": self._results,
|
|
@@ -964,7 +972,7 @@ class MLClientCtx:
|
|
|
964
972
|
set_if_not_none(struct["status"], "commit", self._commit)
|
|
965
973
|
set_if_not_none(struct["status"], "iterations", self._iteration_results)
|
|
966
974
|
|
|
967
|
-
struct["status"][
|
|
975
|
+
struct["status"][RunKeys.artifacts] = self._artifacts_manager.artifact_list()
|
|
968
976
|
self._data_stores.to_dict(struct["spec"])
|
|
969
977
|
return struct
|
|
970
978
|
|
|
@@ -1058,7 +1066,7 @@ class MLClientCtx:
|
|
|
1058
1066
|
set_if_not_none(struct, "status.commit", self._commit)
|
|
1059
1067
|
set_if_not_none(struct, "status.iterations", self._iteration_results)
|
|
1060
1068
|
|
|
1061
|
-
struct[f"status.{
|
|
1069
|
+
struct[f"status.{RunKeys.artifacts}"] = self._artifacts_manager.artifact_list()
|
|
1062
1070
|
return struct
|
|
1063
1071
|
|
|
1064
1072
|
def _init_dbs(self, rundb):
|
|
@@ -13,44 +13,16 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
|
|
16
|
-
import pandas as pd
|
|
17
|
-
import semver
|
|
18
16
|
|
|
19
17
|
import mlrun
|
|
18
|
+
from mlrun.data_types.to_pandas import spark_df_to_pandas
|
|
20
19
|
from mlrun.datastore.sources import ParquetSource
|
|
21
20
|
from mlrun.datastore.targets import get_offline_target
|
|
21
|
+
from mlrun.runtimes import RemoteSparkRuntime
|
|
22
|
+
from mlrun.runtimes.sparkjob import Spark3Runtime
|
|
22
23
|
from mlrun.utils.helpers import additional_filters_warning
|
|
23
24
|
|
|
24
|
-
from ...runtimes import RemoteSparkRuntime
|
|
25
|
-
from ...runtimes.sparkjob import Spark3Runtime
|
|
26
25
|
from .base import BaseMerger
|
|
27
|
-
from .conversion import PandasConversionMixin
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def spark_df_to_pandas(spark_df):
|
|
31
|
-
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
32
|
-
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
33
|
-
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
34
|
-
if semver.parse(pd.__version__)["major"] >= 2:
|
|
35
|
-
import pyspark.sql.functions as pyspark_functions
|
|
36
|
-
|
|
37
|
-
type_conversion_dict = {}
|
|
38
|
-
for field in spark_df.schema.fields:
|
|
39
|
-
if str(field.dataType) == "TimestampType":
|
|
40
|
-
spark_df = spark_df.withColumn(
|
|
41
|
-
field.name,
|
|
42
|
-
pyspark_functions.date_format(
|
|
43
|
-
pyspark_functions.to_timestamp(field.name),
|
|
44
|
-
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
45
|
-
),
|
|
46
|
-
)
|
|
47
|
-
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
48
|
-
df = PandasConversionMixin.toPandas(spark_df)
|
|
49
|
-
if type_conversion_dict:
|
|
50
|
-
df = df.astype(type_conversion_dict)
|
|
51
|
-
return df
|
|
52
|
-
else:
|
|
53
|
-
return PandasConversionMixin.toPandas(spark_df)
|
|
54
26
|
|
|
55
27
|
|
|
56
28
|
class SparkFeatureMerger(BaseMerger):
|
|
@@ -252,7 +224,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
252
224
|
)
|
|
253
225
|
source_kind = target.kind
|
|
254
226
|
source_path = target.get_target_path()
|
|
255
|
-
|
|
227
|
+
source_kwargs = target.source_spark_attributes
|
|
256
228
|
# handling case where there are multiple feature sets and user creates vector where
|
|
257
229
|
# entity_timestamp_column is from a specific feature set (can't be entity timestamp)
|
|
258
230
|
source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
|
mlrun/launcher/local.py
CHANGED
|
@@ -72,9 +72,9 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
72
72
|
reset_on_run: Optional[bool] = None,
|
|
73
73
|
) -> "mlrun.run.RunObject":
|
|
74
74
|
# do not allow local function to be scheduled
|
|
75
|
-
if
|
|
75
|
+
if schedule is not None:
|
|
76
76
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
77
|
-
"
|
|
77
|
+
f"Unexpected {schedule=} parameter for local function execution"
|
|
78
78
|
)
|
|
79
79
|
|
|
80
80
|
self.enrich_runtime(runtime, project)
|
mlrun/model.py
CHANGED
|
@@ -732,6 +732,30 @@ class Notification(ModelObj):
|
|
|
732
732
|
"Notification params size exceeds max size of 1 MB"
|
|
733
733
|
)
|
|
734
734
|
|
|
735
|
+
def validate_notification_params(self):
|
|
736
|
+
notification_class = mlrun.utils.notifications.NotificationTypes(
|
|
737
|
+
self.kind
|
|
738
|
+
).get_notification()
|
|
739
|
+
|
|
740
|
+
secret_params = self.secret_params or {}
|
|
741
|
+
params = self.params or {}
|
|
742
|
+
|
|
743
|
+
# if the secret_params are already masked - no need to validate
|
|
744
|
+
params_secret = secret_params.get("secret", "")
|
|
745
|
+
if params_secret:
|
|
746
|
+
if len(secret_params) > 1:
|
|
747
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
748
|
+
"When the 'secret' key is present, 'secret_params' should not contain any other keys."
|
|
749
|
+
)
|
|
750
|
+
return
|
|
751
|
+
|
|
752
|
+
if not secret_params and not params:
|
|
753
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
754
|
+
"Both 'secret_params' and 'params' are empty, at least one must be defined."
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
notification_class.validate_params(secret_params or params)
|
|
758
|
+
|
|
735
759
|
@staticmethod
|
|
736
760
|
def validate_notification_uniqueness(notifications: list["Notification"]):
|
|
737
761
|
"""Validate that all notifications in the list are unique by name"""
|
|
@@ -873,6 +897,7 @@ class RunSpec(ModelObj):
|
|
|
873
897
|
notifications=None,
|
|
874
898
|
state_thresholds=None,
|
|
875
899
|
reset_on_run=None,
|
|
900
|
+
node_selector=None,
|
|
876
901
|
):
|
|
877
902
|
# A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
|
|
878
903
|
# keys (parameter names) and the values are the type hint given in the input keys after the colon.
|
|
@@ -910,6 +935,7 @@ class RunSpec(ModelObj):
|
|
|
910
935
|
self._notifications = notifications or []
|
|
911
936
|
self.state_thresholds = state_thresholds or {}
|
|
912
937
|
self.reset_on_run = reset_on_run
|
|
938
|
+
self.node_selector = node_selector or {}
|
|
913
939
|
|
|
914
940
|
def _serialize_field(
|
|
915
941
|
self, struct: dict, field_name: str = None, strip: bool = False
|
|
@@ -1285,7 +1311,7 @@ class RunTemplate(ModelObj):
|
|
|
1285
1311
|
|
|
1286
1312
|
task.with_input("data", "/file-dir/path/to/file")
|
|
1287
1313
|
task.with_input("data", "s3://<bucket>/path/to/file")
|
|
1288
|
-
task.with_input("data", "v3io
|
|
1314
|
+
task.with_input("data", "v3io://<data-container>/path/to/file")
|
|
1289
1315
|
"""
|
|
1290
1316
|
if not self.spec.inputs:
|
|
1291
1317
|
self.spec.inputs = {}
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -47,8 +47,8 @@ def get_or_create_model_endpoint(
|
|
|
47
47
|
function_name: str = "",
|
|
48
48
|
context: mlrun.MLClientCtx = None,
|
|
49
49
|
sample_set_statistics: dict[str, typing.Any] = None,
|
|
50
|
-
drift_threshold: float = None,
|
|
51
|
-
possible_drift_threshold: float = None,
|
|
50
|
+
drift_threshold: typing.Optional[float] = None,
|
|
51
|
+
possible_drift_threshold: typing.Optional[float] = None,
|
|
52
52
|
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
53
53
|
db_session=None,
|
|
54
54
|
) -> ModelEndpoint:
|
|
@@ -69,14 +69,14 @@ def get_or_create_model_endpoint(
|
|
|
69
69
|
full function hash.
|
|
70
70
|
:param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
|
|
71
71
|
the new model endpoint (applicable only to new endpoint_id).
|
|
72
|
-
:param drift_threshold: The threshold of which to mark drifts (applicable only to new
|
|
73
|
-
|
|
72
|
+
:param drift_threshold: (deprecated) The threshold of which to mark drifts (applicable only to new
|
|
73
|
+
endpoint_id).
|
|
74
|
+
:param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts (applicable only to new
|
|
74
75
|
endpoint_id).
|
|
75
76
|
:param monitoring_mode: If enabled, apply model monitoring features on the provided endpoint id
|
|
76
77
|
(applicable only to new endpoint_id).
|
|
77
78
|
:param db_session: A runtime session that manages the current dialog with the database.
|
|
78
79
|
|
|
79
|
-
|
|
80
80
|
:return: A ModelEndpoint object
|
|
81
81
|
"""
|
|
82
82
|
|
|
@@ -98,8 +98,6 @@ def get_or_create_model_endpoint(
|
|
|
98
98
|
model_endpoint=model_endpoint,
|
|
99
99
|
model_path=model_path,
|
|
100
100
|
sample_set_statistics=sample_set_statistics,
|
|
101
|
-
drift_threshold=drift_threshold,
|
|
102
|
-
possible_drift_threshold=possible_drift_threshold,
|
|
103
101
|
)
|
|
104
102
|
|
|
105
103
|
except mlrun.errors.MLRunNotFoundError:
|
|
@@ -113,8 +111,6 @@ def get_or_create_model_endpoint(
|
|
|
113
111
|
function_name=function_name,
|
|
114
112
|
context=context,
|
|
115
113
|
sample_set_statistics=sample_set_statistics,
|
|
116
|
-
drift_threshold=drift_threshold,
|
|
117
|
-
possible_drift_threshold=possible_drift_threshold,
|
|
118
114
|
monitoring_mode=monitoring_mode,
|
|
119
115
|
)
|
|
120
116
|
return model_endpoint
|
|
@@ -241,9 +237,7 @@ def _model_endpoint_validations(
|
|
|
241
237
|
model_endpoint: ModelEndpoint,
|
|
242
238
|
model_path: str = "",
|
|
243
239
|
sample_set_statistics: dict[str, typing.Any] = None,
|
|
244
|
-
|
|
245
|
-
possible_drift_threshold: float = None,
|
|
246
|
-
):
|
|
240
|
+
) -> None:
|
|
247
241
|
"""
|
|
248
242
|
Validate that provided model endpoint configurations match the stored fields of the provided `ModelEndpoint`
|
|
249
243
|
object. Usually, this method is called by `get_or_create_model_endpoint()` in cases that the model endpoint
|
|
@@ -257,11 +251,6 @@ def _model_endpoint_validations(
|
|
|
257
251
|
is forbidden to provide a different reference data to that model endpoint.
|
|
258
252
|
In case of discrepancy between the provided `sample_set_statistics` and the
|
|
259
253
|
`model_endpoints.spec.feature_stats`, a warning will be presented to the user.
|
|
260
|
-
:param drift_threshold: The threshold of which to mark drifts. Should be similar to the drift threshold
|
|
261
|
-
that has already assigned to the current model endpoint.
|
|
262
|
-
:param possible_drift_threshold: The threshold of which to mark possible drifts. Should be similar to the possible
|
|
263
|
-
drift threshold that has already assigned to the current model endpoint.
|
|
264
|
-
|
|
265
254
|
"""
|
|
266
255
|
# Model path
|
|
267
256
|
if model_path and model_endpoint.spec.model_uri != model_path:
|
|
@@ -280,28 +269,6 @@ def _model_endpoint_validations(
|
|
|
280
269
|
"Provided sample set statistics is different from the registered statistics. "
|
|
281
270
|
"If new sample set statistics is to be used, new model endpoint should be created"
|
|
282
271
|
)
|
|
283
|
-
# drift and possible drift thresholds
|
|
284
|
-
if drift_threshold:
|
|
285
|
-
current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
286
|
-
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
287
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
288
|
-
)
|
|
289
|
-
if current_drift_threshold != drift_threshold:
|
|
290
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
291
|
-
f"Cannot change existing drift threshold. Expected {current_drift_threshold}, got {drift_threshold} "
|
|
292
|
-
f"Please update drift threshold or generate a new model endpoint record"
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
if possible_drift_threshold:
|
|
296
|
-
current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
297
|
-
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
298
|
-
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
299
|
-
)
|
|
300
|
-
if current_possible_drift_threshold != possible_drift_threshold:
|
|
301
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
302
|
-
f"Cannot change existing possible drift threshold. Expected {current_possible_drift_threshold}, "
|
|
303
|
-
f"got {possible_drift_threshold}. Please update drift threshold or generate a new model endpoint record"
|
|
304
|
-
)
|
|
305
272
|
|
|
306
273
|
|
|
307
274
|
def write_monitoring_df(
|
|
@@ -354,8 +321,6 @@ def _generate_model_endpoint(
|
|
|
354
321
|
function_name: str,
|
|
355
322
|
context: mlrun.MLClientCtx,
|
|
356
323
|
sample_set_statistics: dict[str, typing.Any],
|
|
357
|
-
drift_threshold: float,
|
|
358
|
-
possible_drift_threshold: float,
|
|
359
324
|
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
360
325
|
) -> ModelEndpoint:
|
|
361
326
|
"""
|
|
@@ -374,8 +339,6 @@ def _generate_model_endpoint(
|
|
|
374
339
|
:param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
|
|
375
340
|
the current model endpoint. Will be stored under
|
|
376
341
|
`model_endpoint.status.feature_stats`.
|
|
377
|
-
:param drift_threshold: The threshold of which to mark drifts.
|
|
378
|
-
:param possible_drift_threshold: The threshold of which to mark possible drifts.
|
|
379
342
|
|
|
380
343
|
:return `mlrun.model_monitoring.model_endpoint.ModelEndpoint` object.
|
|
381
344
|
"""
|
|
@@ -393,15 +356,6 @@ def _generate_model_endpoint(
|
|
|
393
356
|
model_endpoint.spec.model_uri = model_path
|
|
394
357
|
model_endpoint.spec.model = model_endpoint_name
|
|
395
358
|
model_endpoint.spec.model_class = "drift-analysis"
|
|
396
|
-
if drift_threshold:
|
|
397
|
-
model_endpoint.spec.monitor_configuration[
|
|
398
|
-
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
399
|
-
] = drift_threshold
|
|
400
|
-
if possible_drift_threshold:
|
|
401
|
-
model_endpoint.spec.monitor_configuration[
|
|
402
|
-
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
403
|
-
] = possible_drift_threshold
|
|
404
|
-
|
|
405
359
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
406
360
|
model_endpoint.status.first_request = model_endpoint.status.last_request = (
|
|
407
361
|
datetime_now().isoformat()
|
|
@@ -615,10 +569,10 @@ def _create_model_monitoring_function_base(
|
|
|
615
569
|
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
616
570
|
FutureWarning,
|
|
617
571
|
)
|
|
618
|
-
if name in mm_constants.
|
|
572
|
+
if name in mm_constants._RESERVED_FUNCTION_NAMES:
|
|
619
573
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
620
|
-
|
|
621
|
-
f"{mm_constants.
|
|
574
|
+
"An application cannot have the following names: "
|
|
575
|
+
f"{mm_constants._RESERVED_FUNCTION_NAMES}"
|
|
622
576
|
)
|
|
623
577
|
if func is None:
|
|
624
578
|
func = ""
|
|
@@ -195,7 +195,10 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
195
195
|
EventFieldType.CURRENT_STATS: json.dumps(
|
|
196
196
|
monitoring_context.sample_df_stats
|
|
197
197
|
),
|
|
198
|
-
EventFieldType.DRIFT_MEASURES:
|
|
198
|
+
EventFieldType.DRIFT_MEASURES: json.dumps(
|
|
199
|
+
metrics_per_feature.T.to_dict()
|
|
200
|
+
| {metric.name: metric.value for metric in metrics}
|
|
201
|
+
),
|
|
199
202
|
EventFieldType.DRIFT_STATUS: status.value,
|
|
200
203
|
},
|
|
201
204
|
)
|
|
@@ -273,26 +273,14 @@ class MonitoringApplicationController:
|
|
|
273
273
|
Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
|
|
274
274
|
"""
|
|
275
275
|
|
|
276
|
-
def __init__(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
project
|
|
280
|
-
):
|
|
281
|
-
"""
|
|
282
|
-
Initialize Monitoring Application Processor object.
|
|
276
|
+
def __init__(self) -> None:
|
|
277
|
+
"""Initialize Monitoring Application Controller"""
|
|
278
|
+
self.project = cast(str, mlrun.mlconf.default_project)
|
|
279
|
+
self.project_obj = mlrun.load_project(name=self.project, url=self.project)
|
|
283
280
|
|
|
284
|
-
|
|
285
|
-
:param project: Project name.
|
|
286
|
-
"""
|
|
287
|
-
self.context = mlrun_context
|
|
288
|
-
self.project = project
|
|
289
|
-
self.project_obj = mlrun.get_or_create_project(project)
|
|
281
|
+
logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
|
|
290
282
|
|
|
291
|
-
|
|
292
|
-
f"Initializing {self.__class__.__name__}", project=project
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
self.db = mlrun.model_monitoring.get_store_object(project=project)
|
|
283
|
+
self.db = mlrun.model_monitoring.get_store_object(project=self.project)
|
|
296
284
|
|
|
297
285
|
self._batch_window_generator = _BatchWindowGenerator(
|
|
298
286
|
batch_dict=json.loads(
|
|
@@ -322,26 +310,27 @@ class MonitoringApplicationController:
|
|
|
322
310
|
return access_key
|
|
323
311
|
|
|
324
312
|
def _initialize_v3io_configurations(self) -> None:
|
|
325
|
-
self.v3io_framesd = mlrun.mlconf.v3io_framesd
|
|
326
|
-
self.v3io_api = mlrun.mlconf.v3io_api
|
|
327
313
|
self.storage_options = dict(
|
|
328
|
-
v3io_access_key=self.model_monitoring_access_key,
|
|
314
|
+
v3io_access_key=self.model_monitoring_access_key,
|
|
315
|
+
v3io_api=mlrun.mlconf.v3io_api,
|
|
329
316
|
)
|
|
330
317
|
|
|
331
|
-
def run(self
|
|
318
|
+
def run(self) -> None:
|
|
332
319
|
"""
|
|
333
|
-
Main method for run all the relevant monitoring applications on each endpoint
|
|
334
|
-
|
|
335
|
-
|
|
320
|
+
Main method for run all the relevant monitoring applications on each endpoint.
|
|
321
|
+
This method handles the following:
|
|
322
|
+
1. List model endpoints
|
|
323
|
+
2. List applications
|
|
324
|
+
3. Check model monitoring windows
|
|
325
|
+
4. Send data to applications
|
|
326
|
+
5. Delete old parquets
|
|
336
327
|
"""
|
|
337
328
|
logger.info("Start running monitoring controller")
|
|
338
329
|
try:
|
|
339
330
|
applications_names = []
|
|
340
331
|
endpoints = self.db.list_model_endpoints()
|
|
341
332
|
if not endpoints:
|
|
342
|
-
|
|
343
|
-
"No model endpoints found", project=self.project
|
|
344
|
-
)
|
|
333
|
+
logger.info("No model endpoints found", project=self.project)
|
|
345
334
|
return
|
|
346
335
|
monitoring_functions = self.project_obj.list_model_monitoring_functions()
|
|
347
336
|
if monitoring_functions:
|
|
@@ -359,58 +348,49 @@ class MonitoringApplicationController:
|
|
|
359
348
|
}
|
|
360
349
|
)
|
|
361
350
|
if not applications_names:
|
|
362
|
-
|
|
363
|
-
"No monitoring functions found", project=self.project
|
|
364
|
-
)
|
|
351
|
+
logger.info("No monitoring functions found", project=self.project)
|
|
365
352
|
return
|
|
366
|
-
|
|
353
|
+
logger.info(
|
|
367
354
|
"Starting to iterate over the applications",
|
|
368
355
|
applications=applications_names,
|
|
369
356
|
)
|
|
370
357
|
|
|
371
358
|
except Exception as e:
|
|
372
|
-
|
|
359
|
+
logger.error(
|
|
373
360
|
"Failed to list endpoints and monitoring applications",
|
|
374
361
|
exc=err_to_str(e),
|
|
375
362
|
)
|
|
376
363
|
return
|
|
377
364
|
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
378
|
-
|
|
379
|
-
max_workers=min(len(endpoints), 10)
|
|
380
|
-
)
|
|
381
|
-
|
|
382
|
-
for endpoint in endpoints:
|
|
383
|
-
if (
|
|
384
|
-
endpoint[mm_constants.EventFieldType.ACTIVE]
|
|
385
|
-
and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
|
|
386
|
-
== mm_constants.ModelMonitoringMode.enabled.value
|
|
387
|
-
):
|
|
388
|
-
# Skip router endpoint:
|
|
365
|
+
with concurrent.futures.ProcessPoolExecutor(
|
|
366
|
+
max_workers=min(len(endpoints), 10)
|
|
367
|
+
) as pool:
|
|
368
|
+
for endpoint in endpoints:
|
|
389
369
|
if (
|
|
390
|
-
|
|
391
|
-
|
|
370
|
+
endpoint[mm_constants.EventFieldType.ACTIVE]
|
|
371
|
+
and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
|
|
372
|
+
== mm_constants.ModelMonitoringMode.enabled.value
|
|
392
373
|
):
|
|
393
|
-
#
|
|
394
|
-
|
|
395
|
-
|
|
374
|
+
# Skip router endpoint:
|
|
375
|
+
if (
|
|
376
|
+
int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
|
|
377
|
+
== mm_constants.EndpointType.ROUTER
|
|
378
|
+
):
|
|
379
|
+
# Router endpoint has no feature stats
|
|
380
|
+
logger.info(
|
|
381
|
+
f"{endpoint[mm_constants.EventFieldType.UID]} is router, skipping"
|
|
382
|
+
)
|
|
383
|
+
continue
|
|
384
|
+
pool.submit(
|
|
385
|
+
MonitoringApplicationController.model_endpoint_process,
|
|
386
|
+
endpoint=endpoint,
|
|
387
|
+
applications_names=applications_names,
|
|
388
|
+
batch_window_generator=self._batch_window_generator,
|
|
389
|
+
project=self.project,
|
|
390
|
+
parquet_directory=self.parquet_directory,
|
|
391
|
+
storage_options=self.storage_options,
|
|
392
|
+
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
396
393
|
)
|
|
397
|
-
continue
|
|
398
|
-
future = pool.submit(
|
|
399
|
-
MonitoringApplicationController.model_endpoint_process,
|
|
400
|
-
endpoint=endpoint,
|
|
401
|
-
applications_names=applications_names,
|
|
402
|
-
batch_window_generator=self._batch_window_generator,
|
|
403
|
-
project=self.project,
|
|
404
|
-
parquet_directory=self.parquet_directory,
|
|
405
|
-
storage_options=self.storage_options,
|
|
406
|
-
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
407
|
-
)
|
|
408
|
-
futures.append(future)
|
|
409
|
-
|
|
410
|
-
for future in concurrent.futures.as_completed(futures):
|
|
411
|
-
result = future.result()
|
|
412
|
-
if result:
|
|
413
|
-
self.context.log_results(result)
|
|
414
394
|
|
|
415
395
|
self._delete_old_parquet(endpoints=endpoints)
|
|
416
396
|
|
|
@@ -424,7 +404,7 @@ class MonitoringApplicationController:
|
|
|
424
404
|
parquet_directory: str,
|
|
425
405
|
storage_options: dict,
|
|
426
406
|
model_monitoring_access_key: str,
|
|
427
|
-
) ->
|
|
407
|
+
) -> None:
|
|
428
408
|
"""
|
|
429
409
|
Process a model endpoint and trigger the monitoring applications. This function running on different process
|
|
430
410
|
for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
|
|
@@ -437,10 +417,8 @@ class MonitoringApplicationController:
|
|
|
437
417
|
:param parquet_directory: (str) Directory to store application parquet files
|
|
438
418
|
:param storage_options: (dict) Storage options for writing ParquetTarget.
|
|
439
419
|
:param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
|
|
440
|
-
|
|
441
420
|
"""
|
|
442
421
|
endpoint_id = endpoint[mm_constants.EventFieldType.UID]
|
|
443
|
-
start_times: set[datetime.datetime] = set()
|
|
444
422
|
try:
|
|
445
423
|
m_fs = fstore.get_feature_set(
|
|
446
424
|
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
@@ -518,16 +496,12 @@ class MonitoringApplicationController:
|
|
|
518
496
|
model_monitoring_access_key=model_monitoring_access_key,
|
|
519
497
|
parquet_target_path=parquet_target_path,
|
|
520
498
|
)
|
|
521
|
-
start_times.add(start_infer_time)
|
|
522
499
|
except Exception:
|
|
523
500
|
logger.exception(
|
|
524
501
|
"Encountered an exception",
|
|
525
502
|
endpoint_id=endpoint[mm_constants.EventFieldType.UID],
|
|
526
503
|
)
|
|
527
504
|
|
|
528
|
-
if start_times:
|
|
529
|
-
return {endpoint_id: [str(t) for t in sorted(list(start_times))]}
|
|
530
|
-
|
|
531
505
|
def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
|
|
532
506
|
"""
|
|
533
507
|
Delete application parquets older than the argument days.
|
|
@@ -673,3 +647,13 @@ class MonitoringApplicationController:
|
|
|
673
647
|
),
|
|
674
648
|
)
|
|
675
649
|
return offline_response
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def handler(context: nuclio.Context, event: nuclio.Event) -> None:
|
|
653
|
+
"""
|
|
654
|
+
Run model monitoring application processor
|
|
655
|
+
|
|
656
|
+
:param context: the Nuclio context
|
|
657
|
+
:param event: trigger event
|
|
658
|
+
"""
|
|
659
|
+
MonitoringApplicationController().run()
|