mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +26 -112
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +46 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +47 -48
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +69 -0
- mlrun/common/db/sql_session.py +2 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/common/formatters/artifact.py +21 -0
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/helpers.py +1 -2
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +24 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +18 -8
- mlrun/common/schemas/auth.py +11 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +8 -7
- mlrun/common/schemas/function.py +5 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +18 -3
- mlrun/common/schemas/model_monitoring/constants.py +83 -26
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +1 -10
- mlrun/common/schemas/project.py +24 -23
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/common/types.py +7 -1
- mlrun/config.py +54 -17
- mlrun/data_types/to_pandas.py +10 -12
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +17 -5
- mlrun/datastore/base.py +62 -39
- mlrun/datastore/datastore.py +28 -9
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +6 -2
- mlrun/datastore/s3.py +9 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +201 -96
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +358 -104
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +5 -1
- mlrun/db/base.py +185 -35
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +614 -179
- mlrun/db/nopdb.py +210 -26
- mlrun/errors.py +12 -1
- mlrun/execution.py +41 -24
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -72
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +28 -30
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/conversion.py +11 -13
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +37 -34
- mlrun/features.py +9 -20
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +4 -3
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +14 -16
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +17 -11
- mlrun/launcher/remote.py +16 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +238 -73
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +138 -315
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +24 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +104 -84
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +127 -28
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stream_processing.py +62 -231
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +35 -21
- mlrun/projects/pipelines.py +68 -99
- mlrun/projects/project.py +830 -266
- mlrun/render.py +3 -11
- mlrun/run.py +162 -166
- mlrun/runtimes/__init__.py +62 -7
- mlrun/runtimes/base.py +39 -32
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +6 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
- mlrun/runtimes/pod.py +286 -88
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +51 -34
- mlrun/runtimes/utils.py +7 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +13 -10
- mlrun/serving/server.py +22 -26
- mlrun/serving/states.py +99 -25
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +59 -20
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +183 -197
- mlrun/utils/http.py +2 -5
- mlrun/utils/logger.py +76 -14
- mlrun/utils/notifications/notification/__init__.py +17 -12
- mlrun/utils/notifications/notification/base.py +14 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +155 -30
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -4
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc2.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
|
@@ -17,26 +17,28 @@ import datetime
|
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
19
|
import re
|
|
20
|
-
from
|
|
20
|
+
from collections.abc import Iterator
|
|
21
|
+
from typing import Any, NamedTuple, Optional, Union, cast
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
import nuclio
|
|
23
24
|
|
|
24
25
|
import mlrun
|
|
25
26
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
26
27
|
import mlrun.data_types.infer
|
|
27
28
|
import mlrun.feature_store as fstore
|
|
29
|
+
import mlrun.model_monitoring.db.stores
|
|
28
30
|
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
|
|
29
31
|
from mlrun.datastore import get_stream_pusher
|
|
30
32
|
from mlrun.datastore.targets import ParquetTarget
|
|
31
|
-
from mlrun.
|
|
33
|
+
from mlrun.errors import err_to_str
|
|
32
34
|
from mlrun.model_monitoring.helpers import (
|
|
33
35
|
_BatchDict,
|
|
34
36
|
batch_dict2timedelta,
|
|
37
|
+
calculate_inputs_statistics,
|
|
35
38
|
get_monitoring_parquet_path,
|
|
36
39
|
get_stream_path,
|
|
37
40
|
)
|
|
38
|
-
from mlrun.utils import
|
|
39
|
-
from mlrun.utils.v3io_clients import get_v3io_client
|
|
41
|
+
from mlrun.utils import datetime_now, logger
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
class _Interval(NamedTuple):
|
|
@@ -45,8 +47,6 @@ class _Interval(NamedTuple):
|
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
class _BatchWindow:
|
|
48
|
-
V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
|
|
49
|
-
|
|
50
50
|
def __init__(
|
|
51
51
|
self,
|
|
52
52
|
project: str,
|
|
@@ -62,27 +62,22 @@ class _BatchWindow:
|
|
|
62
62
|
All the time values are in seconds.
|
|
63
63
|
The start and stop time are in seconds since the epoch.
|
|
64
64
|
"""
|
|
65
|
+
self.project = project
|
|
65
66
|
self._endpoint = endpoint
|
|
66
67
|
self._application = application
|
|
67
68
|
self._first_request = first_request
|
|
68
|
-
self._kv_storage = get_v3io_client(
|
|
69
|
-
endpoint=mlrun.mlconf.v3io_api,
|
|
70
|
-
# Avoid noisy warning logs before the KV table is created
|
|
71
|
-
logger=create_logger(name="v3io_client", level="error"),
|
|
72
|
-
).kv
|
|
73
|
-
self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
|
|
74
69
|
self._stop = last_updated
|
|
75
70
|
self._step = timedelta_seconds
|
|
71
|
+
self._db = mlrun.model_monitoring.get_store_object(project=self.project)
|
|
76
72
|
self._start = self._get_last_analyzed()
|
|
77
73
|
|
|
78
74
|
def _get_last_analyzed(self) -> Optional[int]:
|
|
79
75
|
try:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
key=self._application,
|
|
76
|
+
last_analyzed = self._db.get_last_analyzed(
|
|
77
|
+
endpoint_id=self._endpoint,
|
|
78
|
+
application_name=self._application,
|
|
84
79
|
)
|
|
85
|
-
except
|
|
80
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
86
81
|
logger.info(
|
|
87
82
|
"No last analyzed time was found for this endpoint and "
|
|
88
83
|
"application, as this is probably the first time this "
|
|
@@ -93,7 +88,7 @@ class _BatchWindow:
|
|
|
93
88
|
first_request=self._first_request,
|
|
94
89
|
last_updated=self._stop,
|
|
95
90
|
)
|
|
96
|
-
|
|
91
|
+
|
|
97
92
|
if self._first_request and self._stop:
|
|
98
93
|
# TODO : Change the timedelta according to the policy.
|
|
99
94
|
first_period_in_seconds = max(
|
|
@@ -105,7 +100,6 @@ class _BatchWindow:
|
|
|
105
100
|
)
|
|
106
101
|
return self._first_request
|
|
107
102
|
|
|
108
|
-
last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
|
|
109
103
|
logger.info(
|
|
110
104
|
"Got the last analyzed time for this endpoint and application",
|
|
111
105
|
endpoint=self._endpoint,
|
|
@@ -121,11 +115,11 @@ class _BatchWindow:
|
|
|
121
115
|
application=self._application,
|
|
122
116
|
last_analyzed=last_analyzed,
|
|
123
117
|
)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
118
|
+
|
|
119
|
+
self._db.update_last_analyzed(
|
|
120
|
+
endpoint_id=self._endpoint,
|
|
121
|
+
application_name=self._application,
|
|
122
|
+
last_analyzed=last_analyzed,
|
|
129
123
|
)
|
|
130
124
|
|
|
131
125
|
def get_intervals(
|
|
@@ -281,33 +275,33 @@ class MonitoringApplicationController:
|
|
|
281
275
|
|
|
282
276
|
def __init__(
|
|
283
277
|
self,
|
|
284
|
-
|
|
278
|
+
mlrun_context: mlrun.run.MLClientCtx,
|
|
285
279
|
project: str,
|
|
286
280
|
):
|
|
287
281
|
"""
|
|
288
282
|
Initialize Monitoring Application Processor object.
|
|
289
283
|
|
|
290
|
-
:param
|
|
284
|
+
:param mlrun_context: An MLRun context.
|
|
291
285
|
:param project: Project name.
|
|
292
286
|
"""
|
|
293
|
-
self.context =
|
|
287
|
+
self.context = mlrun_context
|
|
294
288
|
self.project = project
|
|
295
289
|
self.project_obj = mlrun.get_or_create_project(project)
|
|
296
290
|
|
|
297
|
-
|
|
291
|
+
mlrun_context.logger.debug(
|
|
292
|
+
f"Initializing {self.__class__.__name__}", project=project
|
|
293
|
+
)
|
|
298
294
|
|
|
299
|
-
self.db = mlrun.model_monitoring.
|
|
295
|
+
self.db = mlrun.model_monitoring.get_store_object(project=project)
|
|
300
296
|
|
|
301
297
|
self._batch_window_generator = _BatchWindowGenerator(
|
|
302
|
-
batch_dict=
|
|
303
|
-
|
|
304
|
-
|
|
298
|
+
batch_dict=json.loads(
|
|
299
|
+
mlrun.get_secret_or_env(
|
|
300
|
+
mm_constants.EventFieldType.BATCH_INTERVALS_DICT
|
|
301
|
+
)
|
|
302
|
+
)
|
|
305
303
|
)
|
|
306
304
|
|
|
307
|
-
# If provided, only model endpoints in that that list will be analyzed
|
|
308
|
-
self.model_endpoints = context.parameters.get(
|
|
309
|
-
mm_constants.EventFieldType.MODEL_ENDPOINTS, None
|
|
310
|
-
)
|
|
311
305
|
self.model_monitoring_access_key = self._get_model_monitoring_access_key()
|
|
312
306
|
self.parquet_directory = get_monitoring_parquet_path(
|
|
313
307
|
self.project_obj,
|
|
@@ -334,66 +328,91 @@ class MonitoringApplicationController:
|
|
|
334
328
|
v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
|
|
335
329
|
)
|
|
336
330
|
|
|
337
|
-
def run(self):
|
|
331
|
+
def run(self, event: nuclio.Event):
|
|
338
332
|
"""
|
|
339
333
|
Main method for run all the relevant monitoring applications on each endpoint
|
|
334
|
+
|
|
335
|
+
:param event: trigger event
|
|
340
336
|
"""
|
|
337
|
+
logger.info("Start running monitoring controller")
|
|
341
338
|
try:
|
|
342
|
-
|
|
339
|
+
applications_names = []
|
|
340
|
+
endpoints = self.db.list_model_endpoints()
|
|
341
|
+
if not endpoints:
|
|
342
|
+
self.context.logger.info(
|
|
343
|
+
"No model endpoints found", project=self.project
|
|
344
|
+
)
|
|
345
|
+
return
|
|
343
346
|
monitoring_functions = self.project_obj.list_model_monitoring_functions()
|
|
344
347
|
if monitoring_functions:
|
|
348
|
+
# Gets only application in ready state
|
|
345
349
|
applications_names = list(
|
|
346
|
-
{
|
|
350
|
+
{
|
|
351
|
+
app.metadata.name
|
|
352
|
+
for app in monitoring_functions
|
|
353
|
+
if (
|
|
354
|
+
app.status.state == "ready"
|
|
355
|
+
# workaround for the default app, as its `status.state` is `None`
|
|
356
|
+
or app.metadata.name
|
|
357
|
+
== mm_constants.HistogramDataDriftApplicationConstants.NAME
|
|
358
|
+
)
|
|
359
|
+
}
|
|
347
360
|
)
|
|
348
|
-
|
|
361
|
+
if not applications_names:
|
|
349
362
|
self.context.logger.info(
|
|
350
363
|
"No monitoring functions found", project=self.project
|
|
351
364
|
)
|
|
352
|
-
|
|
365
|
+
return
|
|
366
|
+
self.context.logger.info(
|
|
367
|
+
"Starting to iterate over the applications",
|
|
368
|
+
applications=applications_names,
|
|
369
|
+
)
|
|
353
370
|
|
|
354
371
|
except Exception as e:
|
|
355
|
-
self.context.logger.error(
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
359
|
-
pool = concurrent.futures.ProcessPoolExecutor(
|
|
360
|
-
max_workers=min(len(endpoints), 10),
|
|
372
|
+
self.context.logger.error(
|
|
373
|
+
"Failed to list endpoints and monitoring applications",
|
|
374
|
+
exc=err_to_str(e),
|
|
361
375
|
)
|
|
362
|
-
|
|
363
|
-
|
|
376
|
+
return
|
|
377
|
+
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
378
|
+
pool = concurrent.futures.ProcessPoolExecutor(
|
|
379
|
+
max_workers=min(len(endpoints), 10),
|
|
380
|
+
)
|
|
381
|
+
futures = []
|
|
382
|
+
for endpoint in endpoints:
|
|
383
|
+
if (
|
|
384
|
+
endpoint[mm_constants.EventFieldType.ACTIVE]
|
|
385
|
+
and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
|
|
386
|
+
== mm_constants.ModelMonitoringMode.enabled.value
|
|
387
|
+
):
|
|
388
|
+
# Skip router endpoint:
|
|
364
389
|
if (
|
|
365
|
-
endpoint[mm_constants.EventFieldType.
|
|
366
|
-
|
|
367
|
-
== mm_constants.ModelMonitoringMode.enabled.value
|
|
390
|
+
int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
|
|
391
|
+
== mm_constants.EndpointType.ROUTER
|
|
368
392
|
):
|
|
369
|
-
#
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
== mm_constants.EndpointType.ROUTER
|
|
373
|
-
):
|
|
374
|
-
# Router endpoint has no feature stats
|
|
375
|
-
logger.info(
|
|
376
|
-
f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
|
|
377
|
-
)
|
|
378
|
-
continue
|
|
379
|
-
future = pool.submit(
|
|
380
|
-
MonitoringApplicationController.model_endpoint_process,
|
|
381
|
-
endpoint=endpoint,
|
|
382
|
-
applications_names=applications_names,
|
|
383
|
-
batch_window_generator=self._batch_window_generator,
|
|
384
|
-
project=self.project,
|
|
385
|
-
parquet_directory=self.parquet_directory,
|
|
386
|
-
storage_options=self.storage_options,
|
|
387
|
-
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
393
|
+
# Router endpoint has no feature stats
|
|
394
|
+
logger.info(
|
|
395
|
+
f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
|
|
388
396
|
)
|
|
389
|
-
|
|
397
|
+
continue
|
|
398
|
+
future = pool.submit(
|
|
399
|
+
MonitoringApplicationController.model_endpoint_process,
|
|
400
|
+
endpoint=endpoint,
|
|
401
|
+
applications_names=applications_names,
|
|
402
|
+
batch_window_generator=self._batch_window_generator,
|
|
403
|
+
project=self.project,
|
|
404
|
+
parquet_directory=self.parquet_directory,
|
|
405
|
+
storage_options=self.storage_options,
|
|
406
|
+
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
407
|
+
)
|
|
408
|
+
futures.append(future)
|
|
390
409
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
410
|
+
for future in concurrent.futures.as_completed(futures):
|
|
411
|
+
result = future.result()
|
|
412
|
+
if result:
|
|
413
|
+
self.context.log_results(result)
|
|
395
414
|
|
|
396
|
-
|
|
415
|
+
self._delete_old_parquet(endpoints=endpoints)
|
|
397
416
|
|
|
398
417
|
@classmethod
|
|
399
418
|
def model_endpoint_process(
|
|
@@ -438,6 +457,7 @@ class MonitoringApplicationController:
|
|
|
438
457
|
)
|
|
439
458
|
|
|
440
459
|
for start_infer_time, end_infer_time in batch_window.get_intervals():
|
|
460
|
+
# start - TODO : delete in 1.9.0 (V1 app deprecation)
|
|
441
461
|
try:
|
|
442
462
|
# Get application sample data
|
|
443
463
|
offline_response = cls._get_sample_df(
|
|
@@ -483,10 +503,9 @@ class MonitoringApplicationController:
|
|
|
483
503
|
|
|
484
504
|
# Get the current stats:
|
|
485
505
|
current_stats = calculate_inputs_statistics(
|
|
486
|
-
sample_set_statistics=feature_stats,
|
|
487
|
-
inputs=df,
|
|
506
|
+
sample_set_statistics=feature_stats, inputs=df
|
|
488
507
|
)
|
|
489
|
-
|
|
508
|
+
# end - TODO : delete in 1.9.0 (V1 app deprecation)
|
|
490
509
|
cls._push_to_applications(
|
|
491
510
|
current_stats=current_stats,
|
|
492
511
|
feature_stats=feature_stats,
|
|
@@ -517,7 +536,7 @@ class MonitoringApplicationController:
|
|
|
517
536
|
"""
|
|
518
537
|
if self.parquet_directory.startswith("v3io:///"):
|
|
519
538
|
# create fs with access to the user side (under projects)
|
|
520
|
-
store, _ = mlrun.store_manager.get_or_create_store(
|
|
539
|
+
store, _, _ = mlrun.store_manager.get_or_create_store(
|
|
521
540
|
self.parquet_directory,
|
|
522
541
|
{"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
|
|
523
542
|
)
|
|
@@ -593,12 +612,13 @@ class MonitoringApplicationController:
|
|
|
593
612
|
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
594
613
|
mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
|
|
595
614
|
project=project,
|
|
596
|
-
|
|
615
|
+
function_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
597
616
|
),
|
|
617
|
+
mm_constants.ApplicationEvent.MLRUN_CONTEXT: {}, # TODO : for future use by ad-hoc batch infer
|
|
598
618
|
}
|
|
599
619
|
for app_name in applications_names:
|
|
600
620
|
data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
|
|
601
|
-
stream_uri = get_stream_path(project=project,
|
|
621
|
+
stream_uri = get_stream_path(project=project, function_name=app_name)
|
|
602
622
|
|
|
603
623
|
logger.info(
|
|
604
624
|
f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
|
|
@@ -11,19 +11,27 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import nuclio
|
|
14
15
|
|
|
15
16
|
import mlrun
|
|
16
17
|
from mlrun.model_monitoring.controller import MonitoringApplicationController
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
def handler(context:
|
|
20
|
+
def handler(context: nuclio.Context, event: nuclio.Event) -> None:
|
|
20
21
|
"""
|
|
21
22
|
Run model monitoring application processor
|
|
22
23
|
|
|
23
|
-
:param context: the
|
|
24
|
+
:param context: the Nuclio context
|
|
25
|
+
:param event: trigger event
|
|
24
26
|
"""
|
|
27
|
+
context.user_data.monitor_app_controller.run(event)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def init_context(context):
|
|
31
|
+
mlrun_context = mlrun.get_or_create_ctx("model_monitoring_controller")
|
|
32
|
+
mlrun_context.logger.info("Initialize monitoring app controller")
|
|
25
33
|
monitor_app_controller = MonitoringApplicationController(
|
|
26
|
-
|
|
27
|
-
project=
|
|
34
|
+
mlrun_context=mlrun_context,
|
|
35
|
+
project=mlrun_context.project,
|
|
28
36
|
)
|
|
29
|
-
monitor_app_controller
|
|
37
|
+
setattr(context.user_data, "monitor_app_controller", monitor_app_controller)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .stores import ObjectStoreFactory, get_store_object
|
|
16
|
+
from .stores.base import StoreBase
|
|
17
|
+
from .tsdb import get_tsdb_connector
|
|
18
|
+
from .tsdb.base import TSDBConnector
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,64 +12,56 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
|
-
|
|
17
15
|
import enum
|
|
18
16
|
import typing
|
|
17
|
+
import warnings
|
|
19
18
|
|
|
20
19
|
import mlrun.common.schemas.secret
|
|
21
20
|
import mlrun.errors
|
|
22
21
|
|
|
23
|
-
from .
|
|
22
|
+
from .base import StoreBase
|
|
24
23
|
|
|
25
24
|
|
|
26
|
-
class
|
|
27
|
-
"""Enum class to handle the different store type values for saving
|
|
25
|
+
class ObjectStoreFactory(enum.Enum):
|
|
26
|
+
"""Enum class to handle the different store type values for saving model monitoring records."""
|
|
28
27
|
|
|
29
28
|
v3io_nosql = "v3io-nosql"
|
|
30
29
|
SQL = "sql"
|
|
31
30
|
|
|
32
|
-
def
|
|
31
|
+
def to_object_store(
|
|
33
32
|
self,
|
|
34
33
|
project: str,
|
|
35
34
|
access_key: str = None,
|
|
36
|
-
endpoint_store_connection: str = None,
|
|
37
35
|
secret_provider: typing.Callable = None,
|
|
38
|
-
) ->
|
|
36
|
+
) -> StoreBase:
|
|
39
37
|
"""
|
|
40
|
-
Return a
|
|
41
|
-
|
|
42
|
-
:param project:
|
|
43
|
-
:param access_key:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
:param endpoint_store_connection: A valid connection string for model endpoint target. Contains several
|
|
47
|
-
key-value pairs that required for the database connection.
|
|
48
|
-
e.g. A root user with password 1234, tries to connect a schema called
|
|
49
|
-
mlrun within a local MySQL DB instance:
|
|
50
|
-
'mysql+pymysql://root:1234@localhost:3306/mlrun'.
|
|
38
|
+
Return a StoreBase object based on the provided enum value.
|
|
39
|
+
|
|
40
|
+
:param project: The name of the project.
|
|
41
|
+
:param access_key: Access key with permission to the DB table. Note that if access key is None
|
|
42
|
+
and the endpoint target is from type KV then the access key will be
|
|
43
|
+
retrieved from the environment variable.
|
|
51
44
|
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
52
45
|
|
|
53
|
-
:return: `
|
|
46
|
+
:return: `StoreBase` object.
|
|
54
47
|
|
|
55
48
|
"""
|
|
56
49
|
|
|
57
|
-
if self
|
|
58
|
-
from .
|
|
50
|
+
if self == self.v3io_nosql:
|
|
51
|
+
from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
|
|
59
52
|
|
|
60
53
|
# Get V3IO access key from env
|
|
61
54
|
access_key = access_key or mlrun.mlconf.get_v3io_access_key()
|
|
62
55
|
|
|
63
|
-
return
|
|
56
|
+
return KVStoreBase(project=project, access_key=access_key)
|
|
64
57
|
|
|
65
58
|
# Assuming SQL store target if store type is not KV.
|
|
66
59
|
# Update these lines once there are more than two store target types.
|
|
67
60
|
|
|
68
|
-
from .
|
|
61
|
+
from mlrun.model_monitoring.db.stores.sqldb.sql_store import SQLStoreBase
|
|
69
62
|
|
|
70
|
-
return
|
|
63
|
+
return SQLStoreBase(
|
|
71
64
|
project=project,
|
|
72
|
-
sql_connection_string=endpoint_store_connection,
|
|
73
65
|
secret_provider=secret_provider,
|
|
74
66
|
)
|
|
75
67
|
|
|
@@ -88,7 +80,24 @@ def get_model_endpoint_store(
|
|
|
88
80
|
project: str,
|
|
89
81
|
access_key: str = None,
|
|
90
82
|
secret_provider: typing.Callable = None,
|
|
91
|
-
) ->
|
|
83
|
+
) -> StoreBase:
|
|
84
|
+
# Leaving here for backwards compatibility
|
|
85
|
+
warnings.warn(
|
|
86
|
+
"The 'get_model_endpoint_store' function is deprecated and will be removed in 1.9.0. "
|
|
87
|
+
"Please use `get_store_object` instead.",
|
|
88
|
+
# TODO: remove in 1.9.0
|
|
89
|
+
FutureWarning,
|
|
90
|
+
)
|
|
91
|
+
return get_store_object(
|
|
92
|
+
project=project, access_key=access_key, secret_provider=secret_provider
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_store_object(
|
|
97
|
+
project: str,
|
|
98
|
+
access_key: str = None,
|
|
99
|
+
secret_provider: typing.Callable = None,
|
|
100
|
+
) -> StoreBase:
|
|
92
101
|
"""
|
|
93
102
|
Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
|
|
94
103
|
|
|
@@ -96,16 +105,14 @@ def get_model_endpoint_store(
|
|
|
96
105
|
:param access_key: Access key with permission to the DB table.
|
|
97
106
|
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
98
107
|
|
|
99
|
-
:return: `
|
|
100
|
-
model
|
|
108
|
+
:return: `StoreBase` object. Using this object, the user can apply different operations on the
|
|
109
|
+
model monitoring record such as write, update, get and delete a model endpoint.
|
|
101
110
|
"""
|
|
102
111
|
|
|
103
|
-
# Get store type value from
|
|
104
|
-
|
|
105
|
-
mlrun.mlconf.model_endpoint_monitoring.store_type
|
|
106
|
-
)
|
|
112
|
+
# Get store type value from ObjectStoreFactory enum class
|
|
113
|
+
store_type = ObjectStoreFactory(mlrun.mlconf.model_endpoint_monitoring.store_type)
|
|
107
114
|
|
|
108
|
-
# Convert into
|
|
109
|
-
return
|
|
115
|
+
# Convert into store target object
|
|
116
|
+
return store_type.to_object_store(
|
|
110
117
|
project=project, access_key=access_key, secret_provider=secret_provider
|
|
111
118
|
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .store import StoreBase
|