mlrun 1.5.0rc11__py3-none-any.whl → 1.5.0rc13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (49) hide show
  1. mlrun/__main__.py +31 -2
  2. mlrun/api/api/endpoints/functions.py +110 -52
  3. mlrun/api/api/endpoints/model_endpoints.py +0 -56
  4. mlrun/api/crud/model_monitoring/deployment.py +208 -38
  5. mlrun/api/crud/model_monitoring/helpers.py +19 -6
  6. mlrun/api/crud/model_monitoring/model_endpoints.py +14 -31
  7. mlrun/api/db/sqldb/db.py +3 -1
  8. mlrun/api/utils/builder.py +2 -4
  9. mlrun/common/model_monitoring/helpers.py +19 -5
  10. mlrun/common/schemas/model_monitoring/constants.py +69 -0
  11. mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -1
  12. mlrun/config.py +30 -12
  13. mlrun/datastore/__init__.py +1 -0
  14. mlrun/datastore/datastore_profile.py +2 -2
  15. mlrun/datastore/sources.py +4 -30
  16. mlrun/datastore/targets.py +106 -55
  17. mlrun/db/httpdb.py +20 -6
  18. mlrun/feature_store/__init__.py +2 -0
  19. mlrun/feature_store/api.py +3 -31
  20. mlrun/feature_store/feature_vector.py +1 -1
  21. mlrun/feature_store/retrieval/base.py +8 -3
  22. mlrun/launcher/remote.py +3 -3
  23. mlrun/lists.py +11 -0
  24. mlrun/model_monitoring/__init__.py +0 -1
  25. mlrun/model_monitoring/api.py +1 -1
  26. mlrun/model_monitoring/application.py +313 -0
  27. mlrun/model_monitoring/batch_application.py +526 -0
  28. mlrun/model_monitoring/batch_application_handler.py +32 -0
  29. mlrun/model_monitoring/evidently_application.py +89 -0
  30. mlrun/model_monitoring/helpers.py +39 -3
  31. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +38 -7
  32. mlrun/model_monitoring/tracking_policy.py +4 -4
  33. mlrun/model_monitoring/writer.py +37 -0
  34. mlrun/projects/pipelines.py +38 -4
  35. mlrun/projects/project.py +257 -43
  36. mlrun/run.py +5 -2
  37. mlrun/runtimes/__init__.py +2 -0
  38. mlrun/runtimes/function.py +2 -1
  39. mlrun/utils/helpers.py +12 -0
  40. mlrun/utils/http.py +3 -0
  41. mlrun/utils/notifications/notification_pusher.py +22 -8
  42. mlrun/utils/version/version.json +2 -2
  43. {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/METADATA +5 -5
  44. {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/RECORD +49 -44
  45. /mlrun/model_monitoring/{model_monitoring_batch.py → batch.py} +0 -0
  46. {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/LICENSE +0 -0
  47. {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/WHEEL +0 -0
  48. {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/entry_points.txt +0 -0
  49. {mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,526 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ import concurrent.futures
16
+ import datetime
17
+ import json
18
+ import os
19
+ import re
20
+ from typing import List, Tuple
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ import mlrun
26
+ import mlrun.common.helpers
27
+ import mlrun.common.model_monitoring.helpers
28
+ import mlrun.common.schemas.model_monitoring
29
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
30
+ import mlrun.data_types.infer
31
+ import mlrun.feature_store as fstore
32
+ import mlrun.utils.v3io_clients
33
+ from mlrun.datastore import get_stream_pusher
34
+ from mlrun.datastore.targets import ParquetTarget
35
+ from mlrun.model_monitoring.batch import calculate_inputs_statistics
36
+ from mlrun.model_monitoring.helpers import get_monitoring_parquet_path, get_stream_path
37
+ from mlrun.utils import logger
38
+
39
+
40
+ class BatchApplicationProcessor:
41
+ """
42
+ The main object to handle the batch processing job. This object is used to get the required configurations and
43
+ to manage the main monitoring drift detection process based on the current batch.
44
+ Note that the BatchProcessor object requires access keys along with valid project configurations.
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ context: mlrun.run.MLClientCtx,
50
+ project: str,
51
+ ):
52
+ """
53
+ Initialize Batch Processor object.
54
+
55
+ :param context: An MLRun context.
56
+ :param project: Project name.
57
+ """
58
+ self.context = context
59
+ self.project = project
60
+
61
+ logger.info(
62
+ "Initializing BatchProcessor",
63
+ project=project,
64
+ )
65
+
66
+ # Get a runtime database
67
+
68
+ self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
69
+
70
+ # If an error occurs, it will be raised using the following argument
71
+ self.endpoints_exceptions = {}
72
+
73
+ # Get the batch interval range
74
+ self.batch_dict = context.parameters[
75
+ mlrun.common.schemas.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
76
+ ]
77
+
78
+ # TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
79
+ # Convert batch dict string into a dictionary
80
+ if isinstance(self.batch_dict, str):
81
+ self._parse_batch_dict_str()
82
+ # If provided, only model endpoints in that that list will be analyzed
83
+ self.model_endpoints = context.parameters.get(
84
+ mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS, None
85
+ )
86
+ self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
87
+ self.model_monitoring_access_key = (
88
+ os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
89
+ )
90
+ self.parquet_directory = get_monitoring_parquet_path(
91
+ project=project,
92
+ kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
93
+ )
94
+ self.storage_options = None
95
+ if not mlrun.mlconf.is_ce_mode():
96
+ self._initialize_v3io_configurations(
97
+ model_monitoring_access_key=self.model_monitoring_access_key
98
+ )
99
+ elif self.parquet_directory.startswith("s3://"):
100
+ self.storage_options = mlrun.mlconf.get_s3_storage_options()
101
+
102
+ def _initialize_v3io_configurations(
103
+ self,
104
+ v3io_access_key: str = None,
105
+ v3io_framesd: str = None,
106
+ v3io_api: str = None,
107
+ model_monitoring_access_key: str = None,
108
+ ):
109
+ # Get the V3IO configurations
110
+ self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
111
+ self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
112
+
113
+ self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
114
+ self.model_monitoring_access_key = model_monitoring_access_key
115
+ self.storage_options = dict(
116
+ v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
117
+ )
118
+
119
+ def run(self):
120
+ """
121
+ Main method for run all the relevant monitoring application on each endpoint
122
+ """
123
+ try:
124
+ endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
125
+ application = mlrun.get_or_create_project(
126
+ self.project
127
+ ).list_model_monitoring_applications()
128
+ if application:
129
+ applications_names = np.unique(
130
+ [app.metadata.name for app in application]
131
+ ).tolist()
132
+ else:
133
+ logger.info("There are no monitoring application found in this project")
134
+ applications_names = []
135
+
136
+ except Exception as e:
137
+ logger.error("Failed to list endpoints", exc=e)
138
+ return
139
+ if endpoints and applications_names:
140
+ # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
141
+ pool = concurrent.futures.ProcessPoolExecutor(
142
+ max_workers=min(len(endpoints), 10),
143
+ )
144
+ futures = []
145
+ for endpoint in endpoints:
146
+ if (
147
+ endpoint[
148
+ mlrun.common.schemas.model_monitoring.EventFieldType.ACTIVE
149
+ ]
150
+ and endpoint[
151
+ mlrun.common.schemas.model_monitoring.EventFieldType.MONITORING_MODE
152
+ ]
153
+ == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled.value
154
+ ):
155
+ # Skip router endpoint:
156
+ if (
157
+ int(
158
+ endpoint[
159
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE
160
+ ]
161
+ )
162
+ == mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
163
+ ):
164
+ # Router endpoint has no feature stats
165
+ logger.info(
166
+ f"{endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]} is router skipping"
167
+ )
168
+ continue
169
+ future = pool.submit(
170
+ BatchApplicationProcessor.model_endpoint_process,
171
+ endpoint,
172
+ applications_names,
173
+ self.batch_dict,
174
+ self.project,
175
+ self.parquet_directory,
176
+ self.storage_options,
177
+ self.model_monitoring_access_key,
178
+ )
179
+ futures.append(future)
180
+ for future in concurrent.futures.as_completed(futures):
181
+ res = future.result()
182
+ if res:
183
+ self.endpoints_exceptions[res[0]] = res[1]
184
+
185
+ self._delete_old_parquet()
186
+
187
+ @staticmethod
188
+ def model_endpoint_process(
189
+ endpoint: dict,
190
+ applications_names: List[str],
191
+ bath_dict: dict,
192
+ project: str,
193
+ parquet_directory: str,
194
+ storage_options: dict,
195
+ model_monitoring_access_key: str,
196
+ ):
197
+ """
198
+ Process a model endpoint and trigger the monitoring applications,
199
+ this function running on different process for each endpoint.
200
+
201
+ :param endpoint: (dict) Dictionary representing the model endpoint.
202
+ :param applications_names: (Lst[str]) List of application names to push results to.
203
+ :param bath_dict: (dict) Dictionary containing batch interval start and end times.
204
+ :param project: (str) Project name.
205
+ :param parquet_directory: (str) Directory to store Parquet files
206
+ :param storage_options: (dict) Storage options for writing ParquetTarget.
207
+ :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
208
+
209
+ """
210
+ endpoint_id = endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
211
+ try:
212
+ # Getting batch interval start time and end time
213
+ start_time, end_time = BatchApplicationProcessor._get_interval_range(
214
+ bath_dict
215
+ )
216
+ m_fs = fstore.get_feature_set(
217
+ endpoint[
218
+ mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
219
+ ]
220
+ )
221
+ labels = endpoint[
222
+ mlrun.common.schemas.model_monitoring.EventFieldType.LABEL_NAMES
223
+ ]
224
+ if labels:
225
+ if isinstance(labels, str):
226
+ labels = json.loads(labels)
227
+ for label in labels:
228
+ if label not in list(m_fs.spec.features.keys()):
229
+ m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
230
+
231
+ # TODO : add extra feature_sets
232
+
233
+ try:
234
+ # get sample data
235
+ df = BatchApplicationProcessor._get_sample_df(
236
+ m_fs,
237
+ endpoint_id,
238
+ end_time,
239
+ start_time,
240
+ parquet_directory,
241
+ storage_options,
242
+ )
243
+
244
+ if len(df) == 0:
245
+ logger.warn(
246
+ "Not enough model events since the beginning of the batch interval",
247
+ featureset_name=m_fs.metadata.name,
248
+ endpoint=endpoint[
249
+ mlrun.common.schemas.model_monitoring.EventFieldType.UID
250
+ ],
251
+ min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
252
+ start_time=str(
253
+ datetime.datetime.now() - datetime.timedelta(hours=1)
254
+ ),
255
+ end_time=str(datetime.datetime.now()),
256
+ )
257
+ return
258
+
259
+ # TODO: The below warn will be removed once the state of the Feature Store target is updated
260
+ # as expected. In that case, the existence of the file will be checked before trying to get
261
+ # the offline data from the feature set.
262
+ # Continue if not enough events provided since the deployment of the model endpoint
263
+ except FileNotFoundError:
264
+ logger.warn(
265
+ "Parquet not found, probably due to not enough model events",
266
+ # parquet_target=m_fs.status.targets[0].path, TODO:
267
+ endpoint=endpoint[
268
+ mlrun.common.schemas.model_monitoring.EventFieldType.UID
269
+ ],
270
+ min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
271
+ )
272
+ return
273
+
274
+ # Infer feature set stats and schema
275
+ fstore.api._infer_from_static_df(
276
+ df,
277
+ m_fs,
278
+ options=mlrun.data_types.infer.InferOptions.all_stats(),
279
+ )
280
+
281
+ # Save feature set to apply changes
282
+ m_fs.save()
283
+
284
+ # Get the timestamp of the latest request:
285
+ latest_request = df[
286
+ mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP
287
+ ].iloc[-1]
288
+
289
+ # Get the feature stats from the model endpoint for reference data
290
+ feature_stats = json.loads(
291
+ endpoint[
292
+ mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS
293
+ ]
294
+ )
295
+
296
+ # Get the current stats:
297
+ current_stats = calculate_inputs_statistics(
298
+ sample_set_statistics=feature_stats,
299
+ inputs=df,
300
+ )
301
+
302
+ # create and push data to all applications
303
+ BatchApplicationProcessor._push_to_applications(
304
+ current_stats,
305
+ feature_stats,
306
+ parquet_directory,
307
+ end_time,
308
+ endpoint_id,
309
+ latest_request,
310
+ project,
311
+ applications_names,
312
+ model_monitoring_access_key,
313
+ )
314
+
315
+ except FileNotFoundError as e:
316
+ logger.error(
317
+ f"Exception for endpoint {endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]}"
318
+ )
319
+ return endpoint_id, e
320
+
321
+ @staticmethod
322
+ def _get_interval_range(batch_dict) -> Tuple[datetime.datetime, datetime.datetime]:
323
+ """Getting batch interval time range"""
324
+ minutes, hours, days = (
325
+ batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.MINUTES],
326
+ batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.HOURS],
327
+ batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.DAYS],
328
+ )
329
+ start_time = datetime.datetime.now() - datetime.timedelta(
330
+ minutes=minutes, hours=hours, days=days
331
+ )
332
+ end_time = datetime.datetime.now()
333
+ return start_time, end_time
334
+
335
+ def _parse_batch_dict_str(self):
336
+ """Convert batch dictionary string into a valid dictionary"""
337
+ characters_to_remove = "{} "
338
+ pattern = "[" + characters_to_remove + "]"
339
+ # Remove unnecessary characters from the provided string
340
+ batch_list = re.sub(pattern, "", self.batch_dict).split(",")
341
+ # Initialize the dictionary of batch interval ranges
342
+ self.batch_dict = {}
343
+ for pair in batch_list:
344
+ pair_list = pair.split(":")
345
+ self.batch_dict[pair_list[0]] = float(pair_list[1])
346
+
347
+ @staticmethod
348
+ def _get_parquet_path(
349
+ parquet_directory: str, schedule_time: datetime.datetime, endpoint_id: str
350
+ ):
351
+ schedule_time_str = ""
352
+ for unit, fmt in [
353
+ ("year", "%Y"),
354
+ ("month", "%m"),
355
+ ("day", "%d"),
356
+ ("hour", "%H"),
357
+ ("minute", "%M"),
358
+ ]:
359
+ schedule_time_str += f"{unit}={schedule_time.strftime(fmt)}/"
360
+ endpoint_str = f"{mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID}={endpoint_id}"
361
+
362
+ return f"{parquet_directory}/{schedule_time_str}/{endpoint_str}"
363
+
364
+ def _delete_old_parquet(self):
365
+ """Delete all the sample parquets that were saved yesterday - (
366
+ change it to be configurable & and more simple)"""
367
+ _, schedule_time = BatchApplicationProcessor._get_interval_range(
368
+ self.batch_dict
369
+ )
370
+ threshold_date = schedule_time - datetime.timedelta(days=1)
371
+ threshold_year = threshold_date.year
372
+ threshold_month = threshold_date.month
373
+ threshold_day = threshold_date.day
374
+
375
+ base_directory = get_monitoring_parquet_path(
376
+ project=self.project,
377
+ kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
378
+ )
379
+ target = ParquetTarget(path=base_directory)
380
+ fs = target._get_store().get_filesystem()
381
+
382
+ try:
383
+ # List all subdirectories in the base directory
384
+ years_subdirectories = fs.listdir(base_directory)
385
+
386
+ for y_subdirectory in years_subdirectories:
387
+ year = int(y_subdirectory["name"].split("/")[-1].split("=")[1])
388
+ if year == threshold_year:
389
+ month_subdirectories = fs.listdir(y_subdirectory["name"])
390
+ for m_subdirectory in month_subdirectories:
391
+ month = int(m_subdirectory["name"].split("/")[-1].split("=")[1])
392
+ if month == threshold_month:
393
+ day_subdirectories = fs.listdir(m_subdirectory["name"])
394
+ for d_subdirectory in day_subdirectories:
395
+ day = int(
396
+ d_subdirectory["name"].split("/")[-1].split("=")[1]
397
+ )
398
+ if day == threshold_day - 1:
399
+ fs.rm(path=d_subdirectory["name"], recursive=True)
400
+ elif month == threshold_month - 1 and threshold_day == 1:
401
+ fs.rm(path=m_subdirectory["name"], recursive=True)
402
+ elif (
403
+ year == threshold_year - 1
404
+ and threshold_month == 1
405
+ and threshold_day == 1
406
+ ):
407
+ fs.rm(path=y_subdirectory["name"], recursive=True)
408
+ except FileNotFoundError as exc:
409
+ logger.warn(
410
+ f"Batch application process were unsuccessful to remove the old parquets due to {exc}."
411
+ )
412
+
413
+ @staticmethod
414
+ def _push_to_applications(
415
+ current_stats,
416
+ feature_stats,
417
+ parquet_directory,
418
+ end_time,
419
+ endpoint_id,
420
+ latest_request,
421
+ project,
422
+ applications_names,
423
+ model_monitoring_access_key,
424
+ ):
425
+ """
426
+ Pushes data to multiple stream applications.
427
+
428
+ :param current_stats: Current statistics of input data.
429
+ :param feature_stats: Statistics of train features.
430
+ :param parquet_directory: Directory where sample Parquet files are stored.
431
+ :param end_time: End time of the monitoring schedule.
432
+ :param endpoint_id: Identifier for the model endpoint.
433
+ :param latest_request: Timestamp of the latest model request.
434
+ :param project: mlrun Project name.
435
+ :param applications_names: List of application names to which data will be pushed.
436
+
437
+ """
438
+ data = {
439
+ mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
440
+ mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
441
+ mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: BatchApplicationProcessor._get_parquet_path(
442
+ parquet_directory=parquet_directory,
443
+ schedule_time=end_time,
444
+ endpoint_id=endpoint_id,
445
+ ),
446
+ mm_constants.ApplicationEvent.SCHEDULE_TIME: end_time.isoformat(
447
+ sep=" ", timespec="microseconds"
448
+ ),
449
+ mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
450
+ sep=" ", timespec="microseconds"
451
+ ),
452
+ mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
453
+ mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
454
+ project=project,
455
+ application_name=mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.WRITER,
456
+ ),
457
+ }
458
+ for app_name in applications_names:
459
+ data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
460
+ stream_uri = get_stream_path(project=project, application_name=app_name)
461
+ logger.info(
462
+ f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
463
+ )
464
+ get_stream_pusher(stream_uri, access_key=model_monitoring_access_key).push(
465
+ [data]
466
+ )
467
+
468
+ @staticmethod
469
+ def _get_sample_df(
470
+ feature_set,
471
+ endpoint_id,
472
+ end_time,
473
+ start_time,
474
+ parquet_directory,
475
+ storage_options,
476
+ ):
477
+ """
478
+ Retrieves a sample DataFrame of the current input.
479
+
480
+ :param feature_set: The main feature set.
481
+ :param endpoint_id: Identifier for the model endpoint.
482
+ :param end_time: End time of the monitoring schedule.
483
+ :param start_time: Start time of the monitoring schedule.
484
+ :param parquet_directory: Directory where Parquet files are stored.
485
+ :param storage_options: Storage options for accessing the data.
486
+
487
+ :return: Sample DataFrame containing offline features for the specified endpoint.
488
+
489
+ """
490
+ features = [f"{feature_set.metadata.name}.*"]
491
+ join_graph = fstore.JoinGraph(first_feature_set=feature_set.metadata.name)
492
+ vector = fstore.FeatureVector(
493
+ name=f"{endpoint_id}_vector",
494
+ features=features,
495
+ with_indexes=True,
496
+ join_graph=join_graph,
497
+ )
498
+ vector.feature_set_objects = {
499
+ feature_set.metadata.name: feature_set
500
+ } # to avoid exception when the taf is not latest
501
+ entity_rows = pd.DataFrame(
502
+ {
503
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: [
504
+ endpoint_id
505
+ ],
506
+ "scheduled_time": [end_time],
507
+ }
508
+ )
509
+ offline_response = fstore.get_offline_features(
510
+ feature_vector=vector,
511
+ entity_rows=entity_rows,
512
+ entity_timestamp_column="scheduled_time",
513
+ start_time=start_time,
514
+ end_time=end_time,
515
+ timestamp_for_filtering=mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP,
516
+ target=ParquetTarget(
517
+ path=parquet_directory,
518
+ time_partitioning_granularity="minute",
519
+ partition_cols=[
520
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID,
521
+ ],
522
+ storage_options=storage_options,
523
+ ),
524
+ )
525
+ df = offline_response.to_dataframe()
526
+ return df
@@ -0,0 +1,32 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ import mlrun
17
+ from mlrun.model_monitoring.batch_application import BatchApplicationProcessor
18
+
19
+
20
+ def handler(context: mlrun.run.MLClientCtx):
21
+ """
22
+ RunS model monitoring batch application
23
+
24
+ :param context: the MLRun context
25
+ """
26
+ batch_processor = BatchApplicationProcessor(
27
+ context=context,
28
+ project=context.project,
29
+ )
30
+ batch_processor.run()
31
+ if batch_processor.endpoints_exceptions:
32
+ print(batch_processor.endpoints_exceptions)
@@ -0,0 +1,89 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ import uuid
16
+ from typing import Union
17
+
18
+ import pandas as pd
19
+ from evidently.renderers.notebook_utils import determine_template
20
+ from evidently.report.report import Report
21
+ from evidently.suite.base_suite import Suite
22
+ from evidently.ui.workspace import Workspace
23
+ from evidently.utils.dashboard import TemplateParams
24
+
25
+ from mlrun.model_monitoring.application import ModelMonitoringApplication
26
+
27
+
28
+ class EvidentlyModelMonitoringApplication(ModelMonitoringApplication):
29
+ def __init__(
30
+ self, evidently_workspace_path: str = None, evidently_project_id: str = None
31
+ ):
32
+ """
33
+ A class for integrating Evidently for mlrun model monitoring within a monitoring application.
34
+
35
+ :param evidently_workspace_path: (str) The path to the Evidently workspace.
36
+ :param evidently_project_id: (str) The ID of the Evidently project.
37
+
38
+ """
39
+ self.evidently_workspace = Workspace.create(evidently_workspace_path)
40
+ self.evidently_project_id = evidently_project_id
41
+ self.evidently_project = self.evidently_workspace.get_project(
42
+ evidently_project_id
43
+ )
44
+
45
+ def log_evidently_object(
46
+ self, evidently_object: Union[Report, Suite], artifact_name: str
47
+ ):
48
+ """
49
+ Logs an Evidently report or suite as an artifact.
50
+
51
+ :param evidently_object: (Union[Report, Suite]) The Evidently report or suite object.
52
+ :param artifact_name: (str) The name for the logged artifact.
53
+ """
54
+ evidently_object_html = evidently_object.get_html()
55
+ self.context.log_artifact(
56
+ artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
57
+ )
58
+
59
+ def log_project_dashboard(
60
+ self,
61
+ timestamp_start: pd.Timestamp,
62
+ timestamp_end: pd.Timestamp,
63
+ artifact_name: str = "dashboard",
64
+ ):
65
+ """
66
+ Logs an Evidently project dashboard.
67
+
68
+ :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
69
+ :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
70
+ :param artifact_name: (str) The name for the logged artifact.
71
+ """
72
+
73
+ dashboard_info = self.evidently_project.build_dashboard_info(
74
+ timestamp_start, timestamp_end
75
+ )
76
+ template_params = TemplateParams(
77
+ dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
78
+ dashboard_info=dashboard_info,
79
+ additional_graphs={},
80
+ )
81
+
82
+ dashboard_html = self._render(determine_template("inline"), template_params)
83
+ self.context.log_artifact(
84
+ artifact_name, body=dashboard_html.encode("utf-8"), format="html"
85
+ )
86
+
87
+ @staticmethod
88
+ def _render(temple_func, template_params: TemplateParams):
89
+ return temple_func(params=template_params)