mlrun 1.5.0rc11__py3-none-any.whl → 1.5.0rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +31 -2
- mlrun/api/api/endpoints/functions.py +110 -52
- mlrun/api/api/endpoints/model_endpoints.py +0 -56
- mlrun/api/crud/model_monitoring/deployment.py +208 -38
- mlrun/api/crud/model_monitoring/helpers.py +19 -6
- mlrun/api/crud/model_monitoring/model_endpoints.py +14 -31
- mlrun/api/db/sqldb/db.py +3 -1
- mlrun/api/utils/builder.py +2 -4
- mlrun/common/model_monitoring/helpers.py +19 -5
- mlrun/common/schemas/model_monitoring/constants.py +69 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -1
- mlrun/config.py +30 -12
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/datastore_profile.py +2 -2
- mlrun/datastore/sources.py +4 -30
- mlrun/datastore/targets.py +106 -55
- mlrun/db/httpdb.py +20 -6
- mlrun/feature_store/__init__.py +2 -0
- mlrun/feature_store/api.py +3 -31
- mlrun/feature_store/feature_vector.py +1 -1
- mlrun/feature_store/retrieval/base.py +8 -3
- mlrun/launcher/remote.py +3 -3
- mlrun/lists.py +11 -0
- mlrun/model_monitoring/__init__.py +0 -1
- mlrun/model_monitoring/api.py +1 -1
- mlrun/model_monitoring/application.py +313 -0
- mlrun/model_monitoring/batch_application.py +526 -0
- mlrun/model_monitoring/batch_application_handler.py +32 -0
- mlrun/model_monitoring/evidently_application.py +89 -0
- mlrun/model_monitoring/helpers.py +39 -3
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +38 -7
- mlrun/model_monitoring/tracking_policy.py +4 -4
- mlrun/model_monitoring/writer.py +37 -0
- mlrun/projects/pipelines.py +38 -4
- mlrun/projects/project.py +257 -43
- mlrun/run.py +5 -2
- mlrun/runtimes/__init__.py +2 -0
- mlrun/runtimes/function.py +2 -1
- mlrun/utils/helpers.py +12 -0
- mlrun/utils/http.py +3 -0
- mlrun/utils/notifications/notification_pusher.py +22 -8
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/METADATA +5 -5
- {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/RECORD +49 -44
- /mlrun/model_monitoring/{model_monitoring_batch.py → batch.py} +0 -0
- {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/LICENSE +0 -0
- {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/WHEEL +0 -0
- {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/entry_points.txt +0 -0
- {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
import concurrent.futures
|
|
16
|
+
import datetime
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
from typing import List, Tuple
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
25
|
+
import mlrun
|
|
26
|
+
import mlrun.common.helpers
|
|
27
|
+
import mlrun.common.model_monitoring.helpers
|
|
28
|
+
import mlrun.common.schemas.model_monitoring
|
|
29
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
30
|
+
import mlrun.data_types.infer
|
|
31
|
+
import mlrun.feature_store as fstore
|
|
32
|
+
import mlrun.utils.v3io_clients
|
|
33
|
+
from mlrun.datastore import get_stream_pusher
|
|
34
|
+
from mlrun.datastore.targets import ParquetTarget
|
|
35
|
+
from mlrun.model_monitoring.batch import calculate_inputs_statistics
|
|
36
|
+
from mlrun.model_monitoring.helpers import get_monitoring_parquet_path, get_stream_path
|
|
37
|
+
from mlrun.utils import logger
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class BatchApplicationProcessor:
|
|
41
|
+
"""
|
|
42
|
+
The main object to handle the batch processing job. This object is used to get the required configurations and
|
|
43
|
+
to manage the main monitoring drift detection process based on the current batch.
|
|
44
|
+
Note that the BatchProcessor object requires access keys along with valid project configurations.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
context: mlrun.run.MLClientCtx,
|
|
50
|
+
project: str,
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
Initialize Batch Processor object.
|
|
54
|
+
|
|
55
|
+
:param context: An MLRun context.
|
|
56
|
+
:param project: Project name.
|
|
57
|
+
"""
|
|
58
|
+
self.context = context
|
|
59
|
+
self.project = project
|
|
60
|
+
|
|
61
|
+
logger.info(
|
|
62
|
+
"Initializing BatchProcessor",
|
|
63
|
+
project=project,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Get a runtime database
|
|
67
|
+
|
|
68
|
+
self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
|
|
69
|
+
|
|
70
|
+
# If an error occurs, it will be raised using the following argument
|
|
71
|
+
self.endpoints_exceptions = {}
|
|
72
|
+
|
|
73
|
+
# Get the batch interval range
|
|
74
|
+
self.batch_dict = context.parameters[
|
|
75
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
# TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
|
|
79
|
+
# Convert batch dict string into a dictionary
|
|
80
|
+
if isinstance(self.batch_dict, str):
|
|
81
|
+
self._parse_batch_dict_str()
|
|
82
|
+
# If provided, only model endpoints in that that list will be analyzed
|
|
83
|
+
self.model_endpoints = context.parameters.get(
|
|
84
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS, None
|
|
85
|
+
)
|
|
86
|
+
self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
|
|
87
|
+
self.model_monitoring_access_key = (
|
|
88
|
+
os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
|
|
89
|
+
)
|
|
90
|
+
self.parquet_directory = get_monitoring_parquet_path(
|
|
91
|
+
project=project,
|
|
92
|
+
kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
|
|
93
|
+
)
|
|
94
|
+
self.storage_options = None
|
|
95
|
+
if not mlrun.mlconf.is_ce_mode():
|
|
96
|
+
self._initialize_v3io_configurations(
|
|
97
|
+
model_monitoring_access_key=self.model_monitoring_access_key
|
|
98
|
+
)
|
|
99
|
+
elif self.parquet_directory.startswith("s3://"):
|
|
100
|
+
self.storage_options = mlrun.mlconf.get_s3_storage_options()
|
|
101
|
+
|
|
102
|
+
def _initialize_v3io_configurations(
|
|
103
|
+
self,
|
|
104
|
+
v3io_access_key: str = None,
|
|
105
|
+
v3io_framesd: str = None,
|
|
106
|
+
v3io_api: str = None,
|
|
107
|
+
model_monitoring_access_key: str = None,
|
|
108
|
+
):
|
|
109
|
+
# Get the V3IO configurations
|
|
110
|
+
self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
|
|
111
|
+
self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
|
|
112
|
+
|
|
113
|
+
self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
114
|
+
self.model_monitoring_access_key = model_monitoring_access_key
|
|
115
|
+
self.storage_options = dict(
|
|
116
|
+
v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def run(self):
|
|
120
|
+
"""
|
|
121
|
+
Main method for run all the relevant monitoring application on each endpoint
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
|
|
125
|
+
application = mlrun.get_or_create_project(
|
|
126
|
+
self.project
|
|
127
|
+
).list_model_monitoring_applications()
|
|
128
|
+
if application:
|
|
129
|
+
applications_names = np.unique(
|
|
130
|
+
[app.metadata.name for app in application]
|
|
131
|
+
).tolist()
|
|
132
|
+
else:
|
|
133
|
+
logger.info("There are no monitoring application found in this project")
|
|
134
|
+
applications_names = []
|
|
135
|
+
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logger.error("Failed to list endpoints", exc=e)
|
|
138
|
+
return
|
|
139
|
+
if endpoints and applications_names:
|
|
140
|
+
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
141
|
+
pool = concurrent.futures.ProcessPoolExecutor(
|
|
142
|
+
max_workers=min(len(endpoints), 10),
|
|
143
|
+
)
|
|
144
|
+
futures = []
|
|
145
|
+
for endpoint in endpoints:
|
|
146
|
+
if (
|
|
147
|
+
endpoint[
|
|
148
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ACTIVE
|
|
149
|
+
]
|
|
150
|
+
and endpoint[
|
|
151
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.MONITORING_MODE
|
|
152
|
+
]
|
|
153
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled.value
|
|
154
|
+
):
|
|
155
|
+
# Skip router endpoint:
|
|
156
|
+
if (
|
|
157
|
+
int(
|
|
158
|
+
endpoint[
|
|
159
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE
|
|
160
|
+
]
|
|
161
|
+
)
|
|
162
|
+
== mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
|
|
163
|
+
):
|
|
164
|
+
# Router endpoint has no feature stats
|
|
165
|
+
logger.info(
|
|
166
|
+
f"{endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]} is router skipping"
|
|
167
|
+
)
|
|
168
|
+
continue
|
|
169
|
+
future = pool.submit(
|
|
170
|
+
BatchApplicationProcessor.model_endpoint_process,
|
|
171
|
+
endpoint,
|
|
172
|
+
applications_names,
|
|
173
|
+
self.batch_dict,
|
|
174
|
+
self.project,
|
|
175
|
+
self.parquet_directory,
|
|
176
|
+
self.storage_options,
|
|
177
|
+
self.model_monitoring_access_key,
|
|
178
|
+
)
|
|
179
|
+
futures.append(future)
|
|
180
|
+
for future in concurrent.futures.as_completed(futures):
|
|
181
|
+
res = future.result()
|
|
182
|
+
if res:
|
|
183
|
+
self.endpoints_exceptions[res[0]] = res[1]
|
|
184
|
+
|
|
185
|
+
self._delete_old_parquet()
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def model_endpoint_process(
|
|
189
|
+
endpoint: dict,
|
|
190
|
+
applications_names: List[str],
|
|
191
|
+
bath_dict: dict,
|
|
192
|
+
project: str,
|
|
193
|
+
parquet_directory: str,
|
|
194
|
+
storage_options: dict,
|
|
195
|
+
model_monitoring_access_key: str,
|
|
196
|
+
):
|
|
197
|
+
"""
|
|
198
|
+
Process a model endpoint and trigger the monitoring applications,
|
|
199
|
+
this function running on different process for each endpoint.
|
|
200
|
+
|
|
201
|
+
:param endpoint: (dict) Dictionary representing the model endpoint.
|
|
202
|
+
:param applications_names: (Lst[str]) List of application names to push results to.
|
|
203
|
+
:param bath_dict: (dict) Dictionary containing batch interval start and end times.
|
|
204
|
+
:param project: (str) Project name.
|
|
205
|
+
:param parquet_directory: (str) Directory to store Parquet files
|
|
206
|
+
:param storage_options: (dict) Storage options for writing ParquetTarget.
|
|
207
|
+
:param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
|
|
208
|
+
|
|
209
|
+
"""
|
|
210
|
+
endpoint_id = endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
|
|
211
|
+
try:
|
|
212
|
+
# Getting batch interval start time and end time
|
|
213
|
+
start_time, end_time = BatchApplicationProcessor._get_interval_range(
|
|
214
|
+
bath_dict
|
|
215
|
+
)
|
|
216
|
+
m_fs = fstore.get_feature_set(
|
|
217
|
+
endpoint[
|
|
218
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
|
|
219
|
+
]
|
|
220
|
+
)
|
|
221
|
+
labels = endpoint[
|
|
222
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.LABEL_NAMES
|
|
223
|
+
]
|
|
224
|
+
if labels:
|
|
225
|
+
if isinstance(labels, str):
|
|
226
|
+
labels = json.loads(labels)
|
|
227
|
+
for label in labels:
|
|
228
|
+
if label not in list(m_fs.spec.features.keys()):
|
|
229
|
+
m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
|
|
230
|
+
|
|
231
|
+
# TODO : add extra feature_sets
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
# get sample data
|
|
235
|
+
df = BatchApplicationProcessor._get_sample_df(
|
|
236
|
+
m_fs,
|
|
237
|
+
endpoint_id,
|
|
238
|
+
end_time,
|
|
239
|
+
start_time,
|
|
240
|
+
parquet_directory,
|
|
241
|
+
storage_options,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if len(df) == 0:
|
|
245
|
+
logger.warn(
|
|
246
|
+
"Not enough model events since the beginning of the batch interval",
|
|
247
|
+
featureset_name=m_fs.metadata.name,
|
|
248
|
+
endpoint=endpoint[
|
|
249
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
250
|
+
],
|
|
251
|
+
min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
252
|
+
start_time=str(
|
|
253
|
+
datetime.datetime.now() - datetime.timedelta(hours=1)
|
|
254
|
+
),
|
|
255
|
+
end_time=str(datetime.datetime.now()),
|
|
256
|
+
)
|
|
257
|
+
return
|
|
258
|
+
|
|
259
|
+
# TODO: The below warn will be removed once the state of the Feature Store target is updated
|
|
260
|
+
# as expected. In that case, the existence of the file will be checked before trying to get
|
|
261
|
+
# the offline data from the feature set.
|
|
262
|
+
# Continue if not enough events provided since the deployment of the model endpoint
|
|
263
|
+
except FileNotFoundError:
|
|
264
|
+
logger.warn(
|
|
265
|
+
"Parquet not found, probably due to not enough model events",
|
|
266
|
+
# parquet_target=m_fs.status.targets[0].path, TODO:
|
|
267
|
+
endpoint=endpoint[
|
|
268
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
269
|
+
],
|
|
270
|
+
min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
271
|
+
)
|
|
272
|
+
return
|
|
273
|
+
|
|
274
|
+
# Infer feature set stats and schema
|
|
275
|
+
fstore.api._infer_from_static_df(
|
|
276
|
+
df,
|
|
277
|
+
m_fs,
|
|
278
|
+
options=mlrun.data_types.infer.InferOptions.all_stats(),
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Save feature set to apply changes
|
|
282
|
+
m_fs.save()
|
|
283
|
+
|
|
284
|
+
# Get the timestamp of the latest request:
|
|
285
|
+
latest_request = df[
|
|
286
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP
|
|
287
|
+
].iloc[-1]
|
|
288
|
+
|
|
289
|
+
# Get the feature stats from the model endpoint for reference data
|
|
290
|
+
feature_stats = json.loads(
|
|
291
|
+
endpoint[
|
|
292
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS
|
|
293
|
+
]
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Get the current stats:
|
|
297
|
+
current_stats = calculate_inputs_statistics(
|
|
298
|
+
sample_set_statistics=feature_stats,
|
|
299
|
+
inputs=df,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# create and push data to all applications
|
|
303
|
+
BatchApplicationProcessor._push_to_applications(
|
|
304
|
+
current_stats,
|
|
305
|
+
feature_stats,
|
|
306
|
+
parquet_directory,
|
|
307
|
+
end_time,
|
|
308
|
+
endpoint_id,
|
|
309
|
+
latest_request,
|
|
310
|
+
project,
|
|
311
|
+
applications_names,
|
|
312
|
+
model_monitoring_access_key,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
except FileNotFoundError as e:
|
|
316
|
+
logger.error(
|
|
317
|
+
f"Exception for endpoint {endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]}"
|
|
318
|
+
)
|
|
319
|
+
return endpoint_id, e
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def _get_interval_range(batch_dict) -> Tuple[datetime.datetime, datetime.datetime]:
|
|
323
|
+
"""Getting batch interval time range"""
|
|
324
|
+
minutes, hours, days = (
|
|
325
|
+
batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.MINUTES],
|
|
326
|
+
batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.HOURS],
|
|
327
|
+
batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.DAYS],
|
|
328
|
+
)
|
|
329
|
+
start_time = datetime.datetime.now() - datetime.timedelta(
|
|
330
|
+
minutes=minutes, hours=hours, days=days
|
|
331
|
+
)
|
|
332
|
+
end_time = datetime.datetime.now()
|
|
333
|
+
return start_time, end_time
|
|
334
|
+
|
|
335
|
+
def _parse_batch_dict_str(self):
|
|
336
|
+
"""Convert batch dictionary string into a valid dictionary"""
|
|
337
|
+
characters_to_remove = "{} "
|
|
338
|
+
pattern = "[" + characters_to_remove + "]"
|
|
339
|
+
# Remove unnecessary characters from the provided string
|
|
340
|
+
batch_list = re.sub(pattern, "", self.batch_dict).split(",")
|
|
341
|
+
# Initialize the dictionary of batch interval ranges
|
|
342
|
+
self.batch_dict = {}
|
|
343
|
+
for pair in batch_list:
|
|
344
|
+
pair_list = pair.split(":")
|
|
345
|
+
self.batch_dict[pair_list[0]] = float(pair_list[1])
|
|
346
|
+
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _get_parquet_path(
|
|
349
|
+
parquet_directory: str, schedule_time: datetime.datetime, endpoint_id: str
|
|
350
|
+
):
|
|
351
|
+
schedule_time_str = ""
|
|
352
|
+
for unit, fmt in [
|
|
353
|
+
("year", "%Y"),
|
|
354
|
+
("month", "%m"),
|
|
355
|
+
("day", "%d"),
|
|
356
|
+
("hour", "%H"),
|
|
357
|
+
("minute", "%M"),
|
|
358
|
+
]:
|
|
359
|
+
schedule_time_str += f"{unit}={schedule_time.strftime(fmt)}/"
|
|
360
|
+
endpoint_str = f"{mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID}={endpoint_id}"
|
|
361
|
+
|
|
362
|
+
return f"{parquet_directory}/{schedule_time_str}/{endpoint_str}"
|
|
363
|
+
|
|
364
|
+
def _delete_old_parquet(self):
|
|
365
|
+
"""Delete all the sample parquets that were saved yesterday - (
|
|
366
|
+
change it to be configurable & and more simple)"""
|
|
367
|
+
_, schedule_time = BatchApplicationProcessor._get_interval_range(
|
|
368
|
+
self.batch_dict
|
|
369
|
+
)
|
|
370
|
+
threshold_date = schedule_time - datetime.timedelta(days=1)
|
|
371
|
+
threshold_year = threshold_date.year
|
|
372
|
+
threshold_month = threshold_date.month
|
|
373
|
+
threshold_day = threshold_date.day
|
|
374
|
+
|
|
375
|
+
base_directory = get_monitoring_parquet_path(
|
|
376
|
+
project=self.project,
|
|
377
|
+
kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
|
|
378
|
+
)
|
|
379
|
+
target = ParquetTarget(path=base_directory)
|
|
380
|
+
fs = target._get_store().get_filesystem()
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
# List all subdirectories in the base directory
|
|
384
|
+
years_subdirectories = fs.listdir(base_directory)
|
|
385
|
+
|
|
386
|
+
for y_subdirectory in years_subdirectories:
|
|
387
|
+
year = int(y_subdirectory["name"].split("/")[-1].split("=")[1])
|
|
388
|
+
if year == threshold_year:
|
|
389
|
+
month_subdirectories = fs.listdir(y_subdirectory["name"])
|
|
390
|
+
for m_subdirectory in month_subdirectories:
|
|
391
|
+
month = int(m_subdirectory["name"].split("/")[-1].split("=")[1])
|
|
392
|
+
if month == threshold_month:
|
|
393
|
+
day_subdirectories = fs.listdir(m_subdirectory["name"])
|
|
394
|
+
for d_subdirectory in day_subdirectories:
|
|
395
|
+
day = int(
|
|
396
|
+
d_subdirectory["name"].split("/")[-1].split("=")[1]
|
|
397
|
+
)
|
|
398
|
+
if day == threshold_day - 1:
|
|
399
|
+
fs.rm(path=d_subdirectory["name"], recursive=True)
|
|
400
|
+
elif month == threshold_month - 1 and threshold_day == 1:
|
|
401
|
+
fs.rm(path=m_subdirectory["name"], recursive=True)
|
|
402
|
+
elif (
|
|
403
|
+
year == threshold_year - 1
|
|
404
|
+
and threshold_month == 1
|
|
405
|
+
and threshold_day == 1
|
|
406
|
+
):
|
|
407
|
+
fs.rm(path=y_subdirectory["name"], recursive=True)
|
|
408
|
+
except FileNotFoundError as exc:
|
|
409
|
+
logger.warn(
|
|
410
|
+
f"Batch application process were unsuccessful to remove the old parquets due to {exc}."
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
def _push_to_applications(
|
|
415
|
+
current_stats,
|
|
416
|
+
feature_stats,
|
|
417
|
+
parquet_directory,
|
|
418
|
+
end_time,
|
|
419
|
+
endpoint_id,
|
|
420
|
+
latest_request,
|
|
421
|
+
project,
|
|
422
|
+
applications_names,
|
|
423
|
+
model_monitoring_access_key,
|
|
424
|
+
):
|
|
425
|
+
"""
|
|
426
|
+
Pushes data to multiple stream applications.
|
|
427
|
+
|
|
428
|
+
:param current_stats: Current statistics of input data.
|
|
429
|
+
:param feature_stats: Statistics of train features.
|
|
430
|
+
:param parquet_directory: Directory where sample Parquet files are stored.
|
|
431
|
+
:param end_time: End time of the monitoring schedule.
|
|
432
|
+
:param endpoint_id: Identifier for the model endpoint.
|
|
433
|
+
:param latest_request: Timestamp of the latest model request.
|
|
434
|
+
:param project: mlrun Project name.
|
|
435
|
+
:param applications_names: List of application names to which data will be pushed.
|
|
436
|
+
|
|
437
|
+
"""
|
|
438
|
+
data = {
|
|
439
|
+
mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
|
|
440
|
+
mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
|
|
441
|
+
mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: BatchApplicationProcessor._get_parquet_path(
|
|
442
|
+
parquet_directory=parquet_directory,
|
|
443
|
+
schedule_time=end_time,
|
|
444
|
+
endpoint_id=endpoint_id,
|
|
445
|
+
),
|
|
446
|
+
mm_constants.ApplicationEvent.SCHEDULE_TIME: end_time.isoformat(
|
|
447
|
+
sep=" ", timespec="microseconds"
|
|
448
|
+
),
|
|
449
|
+
mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
|
|
450
|
+
sep=" ", timespec="microseconds"
|
|
451
|
+
),
|
|
452
|
+
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
453
|
+
mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
|
|
454
|
+
project=project,
|
|
455
|
+
application_name=mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.WRITER,
|
|
456
|
+
),
|
|
457
|
+
}
|
|
458
|
+
for app_name in applications_names:
|
|
459
|
+
data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
|
|
460
|
+
stream_uri = get_stream_path(project=project, application_name=app_name)
|
|
461
|
+
logger.info(
|
|
462
|
+
f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
|
|
463
|
+
)
|
|
464
|
+
get_stream_pusher(stream_uri, access_key=model_monitoring_access_key).push(
|
|
465
|
+
[data]
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
@staticmethod
|
|
469
|
+
def _get_sample_df(
|
|
470
|
+
feature_set,
|
|
471
|
+
endpoint_id,
|
|
472
|
+
end_time,
|
|
473
|
+
start_time,
|
|
474
|
+
parquet_directory,
|
|
475
|
+
storage_options,
|
|
476
|
+
):
|
|
477
|
+
"""
|
|
478
|
+
Retrieves a sample DataFrame of the current input.
|
|
479
|
+
|
|
480
|
+
:param feature_set: The main feature set.
|
|
481
|
+
:param endpoint_id: Identifier for the model endpoint.
|
|
482
|
+
:param end_time: End time of the monitoring schedule.
|
|
483
|
+
:param start_time: Start time of the monitoring schedule.
|
|
484
|
+
:param parquet_directory: Directory where Parquet files are stored.
|
|
485
|
+
:param storage_options: Storage options for accessing the data.
|
|
486
|
+
|
|
487
|
+
:return: Sample DataFrame containing offline features for the specified endpoint.
|
|
488
|
+
|
|
489
|
+
"""
|
|
490
|
+
features = [f"{feature_set.metadata.name}.*"]
|
|
491
|
+
join_graph = fstore.JoinGraph(first_feature_set=feature_set.metadata.name)
|
|
492
|
+
vector = fstore.FeatureVector(
|
|
493
|
+
name=f"{endpoint_id}_vector",
|
|
494
|
+
features=features,
|
|
495
|
+
with_indexes=True,
|
|
496
|
+
join_graph=join_graph,
|
|
497
|
+
)
|
|
498
|
+
vector.feature_set_objects = {
|
|
499
|
+
feature_set.metadata.name: feature_set
|
|
500
|
+
} # to avoid exception when the taf is not latest
|
|
501
|
+
entity_rows = pd.DataFrame(
|
|
502
|
+
{
|
|
503
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: [
|
|
504
|
+
endpoint_id
|
|
505
|
+
],
|
|
506
|
+
"scheduled_time": [end_time],
|
|
507
|
+
}
|
|
508
|
+
)
|
|
509
|
+
offline_response = fstore.get_offline_features(
|
|
510
|
+
feature_vector=vector,
|
|
511
|
+
entity_rows=entity_rows,
|
|
512
|
+
entity_timestamp_column="scheduled_time",
|
|
513
|
+
start_time=start_time,
|
|
514
|
+
end_time=end_time,
|
|
515
|
+
timestamp_for_filtering=mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP,
|
|
516
|
+
target=ParquetTarget(
|
|
517
|
+
path=parquet_directory,
|
|
518
|
+
time_partitioning_granularity="minute",
|
|
519
|
+
partition_cols=[
|
|
520
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID,
|
|
521
|
+
],
|
|
522
|
+
storage_options=storage_options,
|
|
523
|
+
),
|
|
524
|
+
)
|
|
525
|
+
df = offline_response.to_dataframe()
|
|
526
|
+
return df
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
import mlrun
|
|
17
|
+
from mlrun.model_monitoring.batch_application import BatchApplicationProcessor
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def handler(context: mlrun.run.MLClientCtx):
|
|
21
|
+
"""
|
|
22
|
+
RunS model monitoring batch application
|
|
23
|
+
|
|
24
|
+
:param context: the MLRun context
|
|
25
|
+
"""
|
|
26
|
+
batch_processor = BatchApplicationProcessor(
|
|
27
|
+
context=context,
|
|
28
|
+
project=context.project,
|
|
29
|
+
)
|
|
30
|
+
batch_processor.run()
|
|
31
|
+
if batch_processor.endpoints_exceptions:
|
|
32
|
+
print(batch_processor.endpoints_exceptions)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
import uuid
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
from evidently.renderers.notebook_utils import determine_template
|
|
20
|
+
from evidently.report.report import Report
|
|
21
|
+
from evidently.suite.base_suite import Suite
|
|
22
|
+
from evidently.ui.workspace import Workspace
|
|
23
|
+
from evidently.utils.dashboard import TemplateParams
|
|
24
|
+
|
|
25
|
+
from mlrun.model_monitoring.application import ModelMonitoringApplication
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EvidentlyModelMonitoringApplication(ModelMonitoringApplication):
|
|
29
|
+
def __init__(
|
|
30
|
+
self, evidently_workspace_path: str = None, evidently_project_id: str = None
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
A class for integrating Evidently for mlrun model monitoring within a monitoring application.
|
|
34
|
+
|
|
35
|
+
:param evidently_workspace_path: (str) The path to the Evidently workspace.
|
|
36
|
+
:param evidently_project_id: (str) The ID of the Evidently project.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
self.evidently_workspace = Workspace.create(evidently_workspace_path)
|
|
40
|
+
self.evidently_project_id = evidently_project_id
|
|
41
|
+
self.evidently_project = self.evidently_workspace.get_project(
|
|
42
|
+
evidently_project_id
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def log_evidently_object(
|
|
46
|
+
self, evidently_object: Union[Report, Suite], artifact_name: str
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Logs an Evidently report or suite as an artifact.
|
|
50
|
+
|
|
51
|
+
:param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
|
|
52
|
+
:param artifact_name: (str) The name for the logged artifact.
|
|
53
|
+
"""
|
|
54
|
+
evidently_object_html = evidently_object.get_html()
|
|
55
|
+
self.context.log_artifact(
|
|
56
|
+
artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def log_project_dashboard(
|
|
60
|
+
self,
|
|
61
|
+
timestamp_start: pd.Timestamp,
|
|
62
|
+
timestamp_end: pd.Timestamp,
|
|
63
|
+
artifact_name: str = "dashboard",
|
|
64
|
+
):
|
|
65
|
+
"""
|
|
66
|
+
Logs an Evidently project dashboard.
|
|
67
|
+
|
|
68
|
+
:param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
|
|
69
|
+
:param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
|
|
70
|
+
:param artifact_name: (str) The name for the logged artifact.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
dashboard_info = self.evidently_project.build_dashboard_info(
|
|
74
|
+
timestamp_start, timestamp_end
|
|
75
|
+
)
|
|
76
|
+
template_params = TemplateParams(
|
|
77
|
+
dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
|
|
78
|
+
dashboard_info=dashboard_info,
|
|
79
|
+
additional_graphs={},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
dashboard_html = self._render(determine_template("inline"), template_params)
|
|
83
|
+
self.context.log_artifact(
|
|
84
|
+
artifact_name, body=dashboard_html.encode("utf-8"), format="html"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def _render(temple_func, template_params: TemplateParams):
|
|
89
|
+
return temple_func(params=template_params)
|