mlrun 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +2 -35
- mlrun/__main__.py +3 -41
- mlrun/api/api/api.py +6 -0
- mlrun/api/api/endpoints/feature_store.py +0 -4
- mlrun/api/api/endpoints/files.py +14 -2
- mlrun/api/api/endpoints/frontend_spec.py +2 -1
- mlrun/api/api/endpoints/functions.py +95 -59
- mlrun/api/api/endpoints/grafana_proxy.py +9 -9
- mlrun/api/api/endpoints/logs.py +17 -3
- mlrun/api/api/endpoints/model_endpoints.py +3 -2
- mlrun/api/api/endpoints/pipelines.py +1 -5
- mlrun/api/api/endpoints/projects.py +88 -0
- mlrun/api/api/endpoints/runs.py +48 -6
- mlrun/api/api/endpoints/submit.py +2 -1
- mlrun/api/api/endpoints/workflows.py +355 -0
- mlrun/api/api/utils.py +3 -4
- mlrun/api/crud/__init__.py +1 -0
- mlrun/api/crud/client_spec.py +6 -2
- mlrun/api/crud/feature_store.py +5 -0
- mlrun/api/crud/model_monitoring/__init__.py +1 -0
- mlrun/api/crud/model_monitoring/deployment.py +497 -0
- mlrun/api/crud/model_monitoring/grafana.py +96 -42
- mlrun/api/crud/model_monitoring/helpers.py +159 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +202 -476
- mlrun/api/crud/notifications.py +9 -4
- mlrun/api/crud/pipelines.py +6 -11
- mlrun/api/crud/projects.py +2 -2
- mlrun/api/crud/runtime_resources.py +4 -3
- mlrun/api/crud/runtimes/nuclio/helpers.py +5 -1
- mlrun/api/crud/secrets.py +21 -0
- mlrun/api/crud/workflows.py +352 -0
- mlrun/api/db/base.py +16 -1
- mlrun/api/db/init_db.py +2 -4
- mlrun/api/db/session.py +1 -1
- mlrun/api/db/sqldb/db.py +129 -31
- mlrun/api/db/sqldb/models/models_mysql.py +15 -1
- mlrun/api/db/sqldb/models/models_sqlite.py +16 -2
- mlrun/api/launcher.py +38 -6
- mlrun/api/main.py +3 -2
- mlrun/api/rundb/__init__.py +13 -0
- mlrun/{db → api/rundb}/sqldb.py +36 -84
- mlrun/api/runtime_handlers/__init__.py +56 -0
- mlrun/api/runtime_handlers/base.py +1247 -0
- mlrun/api/runtime_handlers/daskjob.py +209 -0
- mlrun/api/runtime_handlers/kubejob.py +37 -0
- mlrun/api/runtime_handlers/mpijob.py +147 -0
- mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
- mlrun/api/runtime_handlers/sparkjob.py +148 -0
- mlrun/api/schemas/__init__.py +17 -6
- mlrun/api/utils/builder.py +1 -4
- mlrun/api/utils/clients/chief.py +14 -0
- mlrun/api/utils/clients/iguazio.py +33 -33
- mlrun/api/utils/clients/nuclio.py +2 -2
- mlrun/api/utils/periodic.py +9 -2
- mlrun/api/utils/projects/follower.py +14 -7
- mlrun/api/utils/projects/leader.py +2 -1
- mlrun/api/utils/projects/remotes/nop_follower.py +2 -2
- mlrun/api/utils/projects/remotes/nop_leader.py +2 -2
- mlrun/api/utils/runtimes/__init__.py +14 -0
- mlrun/api/utils/runtimes/nuclio.py +43 -0
- mlrun/api/utils/scheduler.py +98 -15
- mlrun/api/utils/singletons/db.py +5 -1
- mlrun/api/utils/singletons/project_member.py +4 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/base.py +6 -6
- mlrun/artifacts/dataset.py +4 -4
- mlrun/artifacts/manager.py +2 -3
- mlrun/artifacts/model.py +2 -2
- mlrun/artifacts/plots.py +8 -8
- mlrun/common/db/__init__.py +14 -0
- mlrun/common/helpers.py +37 -0
- mlrun/{mlutils → common/model_monitoring}/__init__.py +3 -2
- mlrun/common/model_monitoring/helpers.py +69 -0
- mlrun/common/schemas/__init__.py +13 -1
- mlrun/common/schemas/auth.py +4 -1
- mlrun/common/schemas/client_spec.py +1 -1
- mlrun/common/schemas/function.py +17 -0
- mlrun/common/schemas/model_monitoring/__init__.py +48 -0
- mlrun/common/{model_monitoring.py → schemas/model_monitoring/constants.py} +11 -23
- mlrun/common/schemas/model_monitoring/grafana.py +55 -0
- mlrun/common/schemas/{model_endpoints.py → model_monitoring/model_endpoints.py} +32 -65
- mlrun/common/schemas/notification.py +1 -0
- mlrun/common/schemas/object.py +4 -0
- mlrun/common/schemas/project.py +1 -0
- mlrun/common/schemas/regex.py +1 -1
- mlrun/common/schemas/runs.py +1 -8
- mlrun/common/schemas/schedule.py +1 -8
- mlrun/common/schemas/workflow.py +54 -0
- mlrun/config.py +45 -42
- mlrun/datastore/__init__.py +21 -0
- mlrun/datastore/base.py +1 -1
- mlrun/datastore/datastore.py +9 -0
- mlrun/datastore/dbfs_store.py +168 -0
- mlrun/datastore/helpers.py +18 -0
- mlrun/datastore/sources.py +1 -0
- mlrun/datastore/store_resources.py +2 -5
- mlrun/datastore/v3io.py +1 -2
- mlrun/db/__init__.py +4 -68
- mlrun/db/base.py +12 -0
- mlrun/db/factory.py +65 -0
- mlrun/db/httpdb.py +175 -20
- mlrun/db/nopdb.py +4 -2
- mlrun/execution.py +4 -2
- mlrun/feature_store/__init__.py +1 -0
- mlrun/feature_store/api.py +1 -2
- mlrun/feature_store/common.py +2 -1
- mlrun/feature_store/feature_set.py +1 -11
- mlrun/feature_store/feature_vector.py +340 -2
- mlrun/feature_store/ingestion.py +5 -10
- mlrun/feature_store/retrieval/base.py +118 -104
- mlrun/feature_store/retrieval/dask_merger.py +17 -10
- mlrun/feature_store/retrieval/job.py +4 -1
- mlrun/feature_store/retrieval/local_merger.py +18 -18
- mlrun/feature_store/retrieval/spark_merger.py +21 -14
- mlrun/feature_store/retrieval/storey_merger.py +22 -16
- mlrun/kfpops.py +3 -9
- mlrun/launcher/base.py +57 -53
- mlrun/launcher/client.py +5 -4
- mlrun/launcher/factory.py +24 -13
- mlrun/launcher/local.py +6 -6
- mlrun/launcher/remote.py +4 -4
- mlrun/lists.py +0 -11
- mlrun/model.py +11 -17
- mlrun/model_monitoring/__init__.py +2 -22
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +22 -210
- mlrun/model_monitoring/model_endpoint.py +1 -1
- mlrun/model_monitoring/model_monitoring_batch.py +127 -50
- mlrun/model_monitoring/prometheus.py +219 -0
- mlrun/model_monitoring/stores/__init__.py +16 -11
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +95 -23
- mlrun/model_monitoring/stores/models/mysql.py +47 -29
- mlrun/model_monitoring/stores/models/sqlite.py +47 -29
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +31 -19
- mlrun/model_monitoring/{stream_processing_fs.py → stream_processing.py} +206 -64
- mlrun/model_monitoring/tracking_policy.py +104 -0
- mlrun/package/packager.py +6 -8
- mlrun/package/packagers/default_packager.py +121 -10
- mlrun/package/packagers/numpy_packagers.py +1 -1
- mlrun/platforms/__init__.py +0 -2
- mlrun/platforms/iguazio.py +0 -56
- mlrun/projects/pipelines.py +53 -159
- mlrun/projects/project.py +10 -37
- mlrun/render.py +1 -1
- mlrun/run.py +8 -124
- mlrun/runtimes/__init__.py +6 -42
- mlrun/runtimes/base.py +29 -1249
- mlrun/runtimes/daskjob.py +2 -198
- mlrun/runtimes/funcdoc.py +0 -9
- mlrun/runtimes/function.py +25 -29
- mlrun/runtimes/kubejob.py +5 -29
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +10 -1
- mlrun/runtimes/mpijob/v1.py +0 -76
- mlrun/runtimes/mpijob/v1alpha1.py +1 -74
- mlrun/runtimes/nuclio.py +3 -2
- mlrun/runtimes/pod.py +28 -18
- mlrun/runtimes/remotesparkjob.py +1 -15
- mlrun/runtimes/serving.py +14 -6
- mlrun/runtimes/sparkjob/__init__.py +0 -1
- mlrun/runtimes/sparkjob/abstract.py +4 -131
- mlrun/runtimes/utils.py +0 -26
- mlrun/serving/routers.py +7 -7
- mlrun/serving/server.py +11 -8
- mlrun/serving/states.py +7 -1
- mlrun/serving/v2_serving.py +6 -6
- mlrun/utils/helpers.py +23 -42
- mlrun/utils/notifications/notification/__init__.py +4 -0
- mlrun/utils/notifications/notification/webhook.py +61 -0
- mlrun/utils/notifications/notification_pusher.py +5 -25
- mlrun/utils/regex.py +7 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +26 -25
- {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +180 -158
- {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
- mlrun/mlutils/data.py +0 -160
- mlrun/mlutils/models.py +0 -78
- mlrun/mlutils/plots.py +0 -902
- mlrun/utils/model_monitoring.py +0 -249
- /mlrun/{api/db/sqldb/session.py → common/db/sql_session.py} +0 -0
- {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
import pathlib
|
|
16
|
+
import typing
|
|
17
|
+
|
|
18
|
+
import sqlalchemy.orm
|
|
19
|
+
from fastapi import Depends
|
|
20
|
+
|
|
21
|
+
import mlrun.api.api.endpoints.functions
|
|
22
|
+
import mlrun.api.api.utils
|
|
23
|
+
import mlrun.api.crud.model_monitoring.helpers
|
|
24
|
+
import mlrun.api.utils.singletons.db
|
|
25
|
+
import mlrun.api.utils.singletons.k8s
|
|
26
|
+
import mlrun.common.schemas.model_monitoring
|
|
27
|
+
import mlrun.model_monitoring.stream_processing
|
|
28
|
+
import mlrun.model_monitoring.tracking_policy
|
|
29
|
+
from mlrun import feature_store as fstore
|
|
30
|
+
from mlrun.api.api import deps
|
|
31
|
+
from mlrun.api.crud.model_monitoring.helpers import Seconds, seconds2minutes
|
|
32
|
+
from mlrun.utils import logger
|
|
33
|
+
|
|
34
|
+
_MODEL_MONITORING_COMMON_PATH = pathlib.Path(__file__).parents[3] / "model_monitoring"
|
|
35
|
+
_STREAM_PROCESSING_FUNCTION_PATH = (
|
|
36
|
+
_MODEL_MONITORING_COMMON_PATH / "stream_processing.py"
|
|
37
|
+
)
|
|
38
|
+
_MONITORING_BATCH_FUNCTION_PATH = (
|
|
39
|
+
_MODEL_MONITORING_COMMON_PATH / "model_monitoring_batch.py"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class MonitoringDeployment:
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
parquet_batching_max_events: int = mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
47
|
+
max_parquet_save_interval: int = mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
|
|
48
|
+
) -> None:
|
|
49
|
+
self._parquet_batching_max_events = parquet_batching_max_events
|
|
50
|
+
self._max_parquet_save_interval = max_parquet_save_interval
|
|
51
|
+
"""
|
|
52
|
+
Initialize a MonitoringDeployment object, which handles the deployment of both model monitoring stream nuclio
|
|
53
|
+
function and the scheduled batch drift job.
|
|
54
|
+
|
|
55
|
+
:param parquet_batching_max_events: Maximum number of events that will be used for writing the monitoring
|
|
56
|
+
parquet by the monitoring stream function.
|
|
57
|
+
:param max_parquet_save_interval: Maximum number of seconds to hold events before they are written to the
|
|
58
|
+
monitoring parquet target. Note that this value will be used to handle the
|
|
59
|
+
offset by the scheduled batch job.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def deploy_monitoring_functions(
|
|
63
|
+
self,
|
|
64
|
+
project: str,
|
|
65
|
+
model_monitoring_access_key: str,
|
|
66
|
+
db_session: sqlalchemy.orm.Session,
|
|
67
|
+
auth_info: mlrun.common.schemas.AuthInfo,
|
|
68
|
+
tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
|
|
69
|
+
):
|
|
70
|
+
"""
|
|
71
|
+
Invoking monitoring deploying functions.
|
|
72
|
+
|
|
73
|
+
:param project: The name of the project.
|
|
74
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring process.
|
|
75
|
+
:param db_session: A session that manages the current dialog with the database.
|
|
76
|
+
:param auth_info: The auth info of the request.
|
|
77
|
+
:param tracking_policy: Model monitoring configurations.
|
|
78
|
+
"""
|
|
79
|
+
self.deploy_model_monitoring_stream_processing(
|
|
80
|
+
project=project,
|
|
81
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
82
|
+
db_session=db_session,
|
|
83
|
+
auth_info=auth_info,
|
|
84
|
+
tracking_policy=tracking_policy,
|
|
85
|
+
)
|
|
86
|
+
self.deploy_model_monitoring_batch_processing(
|
|
87
|
+
project=project,
|
|
88
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
89
|
+
db_session=db_session,
|
|
90
|
+
auth_info=auth_info,
|
|
91
|
+
tracking_policy=tracking_policy,
|
|
92
|
+
tracking_offset=Seconds(self._max_parquet_save_interval),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def deploy_model_monitoring_stream_processing(
|
|
96
|
+
self,
|
|
97
|
+
project: str,
|
|
98
|
+
model_monitoring_access_key: str,
|
|
99
|
+
db_session: sqlalchemy.orm.Session,
|
|
100
|
+
auth_info: mlrun.common.schemas.AuthInfo,
|
|
101
|
+
tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
|
|
102
|
+
) -> None:
|
|
103
|
+
"""
|
|
104
|
+
Deploying model monitoring stream real time nuclio function. The goal of this real time function is
|
|
105
|
+
to monitor the log of the data stream. It is triggered when a new log entry is detected.
|
|
106
|
+
It processes the new events into statistics that are then written to statistics databases.
|
|
107
|
+
|
|
108
|
+
:param project: The name of the project.
|
|
109
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring process.
|
|
110
|
+
:param db_session: A session that manages the current dialog with the database.
|
|
111
|
+
:param auth_info: The auth info of the request.
|
|
112
|
+
:param tracking_policy: Model monitoring configurations.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
logger.info(
|
|
116
|
+
"Checking if model monitoring stream is already deployed",
|
|
117
|
+
project=project,
|
|
118
|
+
)
|
|
119
|
+
try:
|
|
120
|
+
# validate that the model monitoring stream has not yet been deployed
|
|
121
|
+
mlrun.runtimes.function.get_nuclio_deploy_status(
|
|
122
|
+
name="model-monitoring-stream",
|
|
123
|
+
project=project,
|
|
124
|
+
tag="",
|
|
125
|
+
auth_info=auth_info,
|
|
126
|
+
)
|
|
127
|
+
logger.info(
|
|
128
|
+
"Detected model monitoring stream processing function already deployed",
|
|
129
|
+
project=project,
|
|
130
|
+
)
|
|
131
|
+
return
|
|
132
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
133
|
+
logger.info(
|
|
134
|
+
"Deploying model monitoring stream processing function", project=project
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Get parquet target value for model monitoring stream function
|
|
138
|
+
parquet_target = (
|
|
139
|
+
mlrun.api.crud.model_monitoring.helpers.get_monitoring_parquet_path(
|
|
140
|
+
db_session=db_session, project=project
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
fn = self._initial_model_monitoring_stream_processing_function(
|
|
145
|
+
project=project,
|
|
146
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
147
|
+
tracking_policy=tracking_policy,
|
|
148
|
+
auth_info=auth_info,
|
|
149
|
+
parquet_target=parquet_target,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Adding label to the function - will be used to identify the stream pod
|
|
153
|
+
fn.metadata.labels = {"type": "model-monitoring-stream"}
|
|
154
|
+
|
|
155
|
+
mlrun.api.api.endpoints.functions._build_function(
|
|
156
|
+
db_session=db_session, auth_info=auth_info, function=fn
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def deploy_model_monitoring_batch_processing(
|
|
160
|
+
self,
|
|
161
|
+
project: str,
|
|
162
|
+
model_monitoring_access_key: str,
|
|
163
|
+
db_session: sqlalchemy.orm.Session,
|
|
164
|
+
auth_info: mlrun.common.schemas.AuthInfo,
|
|
165
|
+
tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
|
|
166
|
+
tracking_offset: Seconds = Seconds(0),
|
|
167
|
+
):
|
|
168
|
+
"""
|
|
169
|
+
Deploying model monitoring batch job. The goal of this job is to identify drift in the data
|
|
170
|
+
based on the latest batch of events. By default, this job is executed on the hour every hour.
|
|
171
|
+
Note that if the monitoring batch job was already deployed then you will have to delete the
|
|
172
|
+
old monitoring batch job before deploying a new one.
|
|
173
|
+
|
|
174
|
+
:param project: The name of the project.
|
|
175
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring process.
|
|
176
|
+
:param db_session: A session that manages the current dialog with the database.
|
|
177
|
+
:param auth_info: The auth info of the request.
|
|
178
|
+
:param tracking_policy: Model monitoring configurations.
|
|
179
|
+
:param tracking_offset: Offset for the tracking policy (for synchronization with the stream)
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
logger.info(
|
|
183
|
+
"Checking if model monitoring batch processing function is already deployed",
|
|
184
|
+
project=project,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Try to list functions that named model monitoring batch
|
|
188
|
+
# to make sure that this job has not yet been deployed
|
|
189
|
+
function_list = mlrun.api.utils.singletons.db.get_db().list_functions(
|
|
190
|
+
session=db_session, name="model-monitoring-batch", project=project
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
if function_list:
|
|
194
|
+
logger.info(
|
|
195
|
+
"Detected model monitoring batch processing function already deployed",
|
|
196
|
+
project=project,
|
|
197
|
+
)
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
# Create a monitoring batch job function object
|
|
201
|
+
fn = self._get_model_monitoring_batch_function(
|
|
202
|
+
project=project,
|
|
203
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
204
|
+
db_session=db_session,
|
|
205
|
+
auth_info=auth_info,
|
|
206
|
+
tracking_policy=tracking_policy,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Get the function uri
|
|
210
|
+
function_uri = fn.save(versioned=True)
|
|
211
|
+
function_uri = function_uri.replace("db://", "")
|
|
212
|
+
|
|
213
|
+
task = mlrun.new_task(name="model-monitoring-batch", project=project)
|
|
214
|
+
task.spec.function = function_uri
|
|
215
|
+
|
|
216
|
+
# Apply batching interval params
|
|
217
|
+
interval_list = [
|
|
218
|
+
tracking_policy.default_batch_intervals.minute,
|
|
219
|
+
tracking_policy.default_batch_intervals.hour,
|
|
220
|
+
tracking_policy.default_batch_intervals.day,
|
|
221
|
+
]
|
|
222
|
+
(
|
|
223
|
+
minutes,
|
|
224
|
+
hours,
|
|
225
|
+
days,
|
|
226
|
+
) = mlrun.api.crud.model_monitoring.helpers.get_batching_interval_param(
|
|
227
|
+
interval_list
|
|
228
|
+
)
|
|
229
|
+
batch_dict = {"minutes": minutes, "hours": hours, "days": days}
|
|
230
|
+
|
|
231
|
+
task.spec.parameters[
|
|
232
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
|
|
233
|
+
] = batch_dict
|
|
234
|
+
|
|
235
|
+
data = {
|
|
236
|
+
"task": task.to_dict(),
|
|
237
|
+
"schedule": mlrun.api.crud.model_monitoring.helpers.convert_to_cron_string(
|
|
238
|
+
tracking_policy.default_batch_intervals,
|
|
239
|
+
minute_delay=seconds2minutes(tracking_offset),
|
|
240
|
+
),
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
logger.info(
|
|
244
|
+
"Deploying model monitoring batch processing function", project=project
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Add job schedule policy (every hour by default)
|
|
248
|
+
mlrun.api.api.utils.submit_run_sync(
|
|
249
|
+
db_session=db_session, auth_info=auth_info, data=data
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def _initial_model_monitoring_stream_processing_function(
|
|
253
|
+
self,
|
|
254
|
+
project: str,
|
|
255
|
+
model_monitoring_access_key: str,
|
|
256
|
+
tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
|
|
257
|
+
auth_info: mlrun.common.schemas.AuthInfo,
|
|
258
|
+
parquet_target: str,
|
|
259
|
+
):
|
|
260
|
+
"""
|
|
261
|
+
Initialize model monitoring stream processing function.
|
|
262
|
+
|
|
263
|
+
:param project: Project name.
|
|
264
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring process. Please note that in CE
|
|
265
|
+
deployments this parameter will be None.
|
|
266
|
+
:param tracking_policy: Model monitoring configurations.
|
|
267
|
+
:param auth_info: The auth info of the request.
|
|
268
|
+
:param parquet_target: Path to model monitoring parquet file that will be generated by the
|
|
269
|
+
monitoring stream nuclio function.
|
|
270
|
+
|
|
271
|
+
:return: A function object from a mlrun runtime class
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
# Initialize Stream Processor object
|
|
276
|
+
stream_processor = (
|
|
277
|
+
mlrun.model_monitoring.stream_processing.EventStreamProcessor(
|
|
278
|
+
project=project,
|
|
279
|
+
parquet_batching_max_events=self._parquet_batching_max_events,
|
|
280
|
+
parquet_batching_timeout_secs=self._max_parquet_save_interval,
|
|
281
|
+
parquet_target=parquet_target,
|
|
282
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Create a new serving function for the streaming process
|
|
287
|
+
function = mlrun.code_to_function(
|
|
288
|
+
name="model-monitoring-stream",
|
|
289
|
+
project=project,
|
|
290
|
+
filename=str(_STREAM_PROCESSING_FUNCTION_PATH),
|
|
291
|
+
kind="serving",
|
|
292
|
+
image=tracking_policy.stream_image,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Create monitoring serving graph
|
|
296
|
+
stream_processor.apply_monitoring_serving_graph(function)
|
|
297
|
+
|
|
298
|
+
# Set the project to the serving function
|
|
299
|
+
function.metadata.project = project
|
|
300
|
+
|
|
301
|
+
# Add stream triggers
|
|
302
|
+
function = self._apply_stream_trigger(
|
|
303
|
+
project=project,
|
|
304
|
+
function=function,
|
|
305
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
306
|
+
auth_info=auth_info,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Apply feature store run configurations on the serving function
|
|
310
|
+
run_config = fstore.RunConfig(function=function, local=False)
|
|
311
|
+
function.spec.parameters = run_config.parameters
|
|
312
|
+
|
|
313
|
+
return function
|
|
314
|
+
|
|
315
|
+
def _get_model_monitoring_batch_function(
|
|
316
|
+
self,
|
|
317
|
+
project: str,
|
|
318
|
+
model_monitoring_access_key: str,
|
|
319
|
+
db_session: sqlalchemy.orm.Session,
|
|
320
|
+
auth_info: mlrun.common.schemas.AuthInfo,
|
|
321
|
+
tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
|
|
322
|
+
):
|
|
323
|
+
"""
|
|
324
|
+
Initialize model monitoring batch function.
|
|
325
|
+
|
|
326
|
+
:param project: project name.
|
|
327
|
+
:param model_monitoring_access_key: access key to apply the model monitoring process. Please note that in CE
|
|
328
|
+
deployments this parameter will be None.
|
|
329
|
+
:param db_session: A session that manages the current dialog with the database.
|
|
330
|
+
:param auth_info: The auth info of the request.
|
|
331
|
+
:param tracking_policy: Model monitoring configurations.
|
|
332
|
+
|
|
333
|
+
:return: A function object from a mlrun runtime class
|
|
334
|
+
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
# Create job function runtime for the model monitoring batch
|
|
338
|
+
function: mlrun.runtimes.KubejobRuntime = mlrun.code_to_function(
|
|
339
|
+
name="model-monitoring-batch",
|
|
340
|
+
project=project,
|
|
341
|
+
filename=str(_MONITORING_BATCH_FUNCTION_PATH),
|
|
342
|
+
kind="job",
|
|
343
|
+
image=tracking_policy.default_batch_image,
|
|
344
|
+
handler="handler",
|
|
345
|
+
)
|
|
346
|
+
function.set_db_connection(mlrun.api.api.utils.get_run_db_instance(db_session))
|
|
347
|
+
|
|
348
|
+
# Set the project to the job function
|
|
349
|
+
function.metadata.project = project
|
|
350
|
+
|
|
351
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
352
|
+
function = self._apply_access_key_and_mount_function(
|
|
353
|
+
project=project,
|
|
354
|
+
function=function,
|
|
355
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
356
|
+
auth_info=auth_info,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Enrich runtime with the required configurations
|
|
360
|
+
mlrun.api.api.utils.apply_enrichment_and_validation_on_function(
|
|
361
|
+
function, auth_info
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
return function
|
|
365
|
+
|
|
366
|
+
def _apply_stream_trigger(
|
|
367
|
+
self,
|
|
368
|
+
project: str,
|
|
369
|
+
function: mlrun.runtimes.ServingRuntime,
|
|
370
|
+
model_monitoring_access_key: str = None,
|
|
371
|
+
auth_info: mlrun.common.schemas.AuthInfo = Depends(deps.authenticate_request),
|
|
372
|
+
) -> mlrun.runtimes.ServingRuntime:
|
|
373
|
+
"""Adding stream source for the nuclio serving function. By default, the function has HTTP stream trigger along
|
|
374
|
+
with another supported stream source that can be either Kafka or V3IO, depends on the stream path schema that is
|
|
375
|
+
defined under mlrun.mlconf.model_endpoint_monitoring.store_prefixes. Note that if no valid stream path has been
|
|
376
|
+
provided then the function will have a single HTTP stream source.
|
|
377
|
+
|
|
378
|
+
:param project: Project name.
|
|
379
|
+
:param function: The serving function object that will be applied with the stream trigger.
|
|
380
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring stream function when the stream is
|
|
381
|
+
schema is V3IO.
|
|
382
|
+
:param auth_info: The auth info of the request.
|
|
383
|
+
|
|
384
|
+
:return: ServingRuntime object with stream trigger.
|
|
385
|
+
"""
|
|
386
|
+
|
|
387
|
+
# Get the stream path from the configuration
|
|
388
|
+
# stream_path = mlrun.mlconf.get_file_target_path(project=project, kind="stream", target="stream")
|
|
389
|
+
stream_path = mlrun.api.crud.model_monitoring.get_stream_path(project=project)
|
|
390
|
+
|
|
391
|
+
if stream_path.startswith("kafka://"):
|
|
392
|
+
topic, brokers = mlrun.datastore.utils.parse_kafka_url(url=stream_path)
|
|
393
|
+
# Generate Kafka stream source
|
|
394
|
+
stream_source = mlrun.datastore.sources.KafkaSource(
|
|
395
|
+
brokers=brokers,
|
|
396
|
+
topics=[topic],
|
|
397
|
+
)
|
|
398
|
+
function = stream_source.add_nuclio_trigger(function)
|
|
399
|
+
|
|
400
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
401
|
+
function = self._apply_access_key_and_mount_function(
|
|
402
|
+
project=project,
|
|
403
|
+
function=function,
|
|
404
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
405
|
+
auth_info=auth_info,
|
|
406
|
+
)
|
|
407
|
+
if stream_path.startswith("v3io://"):
|
|
408
|
+
# Generate V3IO stream trigger
|
|
409
|
+
function.add_v3io_stream_trigger(
|
|
410
|
+
stream_path=stream_path, name="monitoring_stream_trigger"
|
|
411
|
+
)
|
|
412
|
+
# Add the default HTTP source
|
|
413
|
+
http_source = mlrun.datastore.sources.HttpSource()
|
|
414
|
+
function = http_source.add_nuclio_trigger(function)
|
|
415
|
+
|
|
416
|
+
return function
|
|
417
|
+
|
|
418
|
+
@staticmethod
|
|
419
|
+
def _apply_access_key_and_mount_function(
|
|
420
|
+
project: str,
|
|
421
|
+
function: typing.Union[
|
|
422
|
+
mlrun.runtimes.KubejobRuntime, mlrun.runtimes.ServingRuntime
|
|
423
|
+
],
|
|
424
|
+
model_monitoring_access_key: str,
|
|
425
|
+
auth_info: mlrun.common.schemas.AuthInfo,
|
|
426
|
+
) -> typing.Union[mlrun.runtimes.KubejobRuntime, mlrun.runtimes.ServingRuntime]:
|
|
427
|
+
"""Applying model monitoring access key on the provided function when using V3IO path. In addition, this method
|
|
428
|
+
mount the V3IO path for the provided function to configure the access to the system files.
|
|
429
|
+
|
|
430
|
+
:param project: Project name.
|
|
431
|
+
:param function: Model monitoring function object that will be filled with the access key and
|
|
432
|
+
the access to the system files.
|
|
433
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring stream function when the stream is
|
|
434
|
+
schema is V3IO.
|
|
435
|
+
:param auth_info: The auth info of the request.
|
|
436
|
+
|
|
437
|
+
:return: function runtime object with access key and access to system files.
|
|
438
|
+
"""
|
|
439
|
+
|
|
440
|
+
# Set model monitoring access key for managing permissions
|
|
441
|
+
function.set_env_from_secret(
|
|
442
|
+
mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
|
|
443
|
+
mlrun.api.utils.singletons.k8s.get_k8s_helper().get_project_secret_name(
|
|
444
|
+
project
|
|
445
|
+
),
|
|
446
|
+
mlrun.api.crud.secrets.Secrets().generate_client_project_secret_key(
|
|
447
|
+
mlrun.api.crud.secrets.SecretsClientType.model_monitoring,
|
|
448
|
+
mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
|
|
449
|
+
),
|
|
450
|
+
)
|
|
451
|
+
print("[EYAL]: setting batch creds!")
|
|
452
|
+
function.metadata.credentials.access_key = model_monitoring_access_key
|
|
453
|
+
function.apply(mlrun.mount_v3io())
|
|
454
|
+
|
|
455
|
+
# Ensure that the auth env vars are set
|
|
456
|
+
mlrun.api.api.utils.ensure_function_has_auth_set(function, auth_info)
|
|
457
|
+
print("[EYAL]: setting batch creds DONE!")
|
|
458
|
+
return function
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def get_endpoint_features(
|
|
462
|
+
feature_names: typing.List[str],
|
|
463
|
+
feature_stats: dict = None,
|
|
464
|
+
current_stats: dict = None,
|
|
465
|
+
) -> typing.List[mlrun.common.schemas.Features]:
|
|
466
|
+
"""
|
|
467
|
+
Getting a new list of features that exist in feature_names along with their expected (feature_stats) and
|
|
468
|
+
actual (current_stats) stats. The expected stats were calculated during the creation of the model endpoint,
|
|
469
|
+
usually based on the data from the Model Artifact. The actual stats are based on the results from the latest
|
|
470
|
+
model monitoring batch job.
|
|
471
|
+
|
|
472
|
+
param feature_names: List of feature names.
|
|
473
|
+
param feature_stats: Dictionary of feature stats that were stored during the creation of the model endpoint
|
|
474
|
+
object.
|
|
475
|
+
param current_stats: Dictionary of the latest stats that were stored during the last run of the model monitoring
|
|
476
|
+
batch job.
|
|
477
|
+
|
|
478
|
+
return: List of feature objects. Each feature has a name, weight, expected values, and actual values. More info
|
|
479
|
+
can be found under `mlrun.common.schemas.Features`.
|
|
480
|
+
"""
|
|
481
|
+
|
|
482
|
+
# Initialize feature and current stats dictionaries
|
|
483
|
+
safe_feature_stats = feature_stats or {}
|
|
484
|
+
safe_current_stats = current_stats or {}
|
|
485
|
+
|
|
486
|
+
# Create feature object and add it to a general features list
|
|
487
|
+
features = []
|
|
488
|
+
for name in feature_names:
|
|
489
|
+
if feature_stats is not None and name not in feature_stats:
|
|
490
|
+
logger.warn("Feature missing from 'feature_stats'", name=name)
|
|
491
|
+
if current_stats is not None and name not in current_stats:
|
|
492
|
+
logger.warn("Feature missing from 'current_stats'", name=name)
|
|
493
|
+
f = mlrun.common.schemas.Features.new(
|
|
494
|
+
name, safe_feature_stats.get(name), safe_current_stats.get(name)
|
|
495
|
+
)
|
|
496
|
+
features.append(f)
|
|
497
|
+
return features
|