mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/alerts/alert.py +75 -8
  3. mlrun/artifacts/base.py +1 -0
  4. mlrun/artifacts/manager.py +9 -2
  5. mlrun/common/constants.py +4 -1
  6. mlrun/common/db/sql_session.py +3 -2
  7. mlrun/common/formatters/__init__.py +1 -0
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
  10. mlrun/common/formatters/run.py +3 -0
  11. mlrun/common/helpers.py +0 -1
  12. mlrun/common/schemas/__init__.py +3 -1
  13. mlrun/common/schemas/alert.py +15 -12
  14. mlrun/common/schemas/api_gateway.py +6 -6
  15. mlrun/common/schemas/auth.py +5 -0
  16. mlrun/common/schemas/client_spec.py +0 -1
  17. mlrun/common/schemas/common.py +7 -4
  18. mlrun/common/schemas/frontend_spec.py +7 -0
  19. mlrun/common/schemas/function.py +7 -0
  20. mlrun/common/schemas/model_monitoring/__init__.py +4 -3
  21. mlrun/common/schemas/model_monitoring/constants.py +41 -26
  22. mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
  23. mlrun/common/schemas/notification.py +69 -12
  24. mlrun/common/schemas/project.py +45 -12
  25. mlrun/common/schemas/workflow.py +10 -2
  26. mlrun/common/types.py +1 -0
  27. mlrun/config.py +91 -35
  28. mlrun/data_types/data_types.py +6 -1
  29. mlrun/data_types/spark.py +2 -2
  30. mlrun/data_types/to_pandas.py +57 -25
  31. mlrun/datastore/__init__.py +1 -0
  32. mlrun/datastore/alibaba_oss.py +3 -2
  33. mlrun/datastore/azure_blob.py +125 -37
  34. mlrun/datastore/base.py +42 -21
  35. mlrun/datastore/datastore.py +4 -2
  36. mlrun/datastore/datastore_profile.py +1 -1
  37. mlrun/datastore/dbfs_store.py +3 -7
  38. mlrun/datastore/filestore.py +1 -3
  39. mlrun/datastore/google_cloud_storage.py +85 -29
  40. mlrun/datastore/inmem.py +4 -1
  41. mlrun/datastore/redis.py +1 -0
  42. mlrun/datastore/s3.py +25 -12
  43. mlrun/datastore/sources.py +76 -4
  44. mlrun/datastore/spark_utils.py +30 -0
  45. mlrun/datastore/storeytargets.py +151 -0
  46. mlrun/datastore/targets.py +102 -131
  47. mlrun/datastore/v3io.py +1 -0
  48. mlrun/db/base.py +15 -6
  49. mlrun/db/httpdb.py +57 -28
  50. mlrun/db/nopdb.py +29 -5
  51. mlrun/errors.py +20 -3
  52. mlrun/execution.py +46 -5
  53. mlrun/feature_store/api.py +25 -1
  54. mlrun/feature_store/common.py +6 -11
  55. mlrun/feature_store/feature_vector.py +3 -1
  56. mlrun/feature_store/retrieval/job.py +4 -1
  57. mlrun/feature_store/retrieval/spark_merger.py +10 -39
  58. mlrun/feature_store/steps.py +8 -0
  59. mlrun/frameworks/_common/plan.py +3 -3
  60. mlrun/frameworks/_ml_common/plan.py +1 -1
  61. mlrun/frameworks/parallel_coordinates.py +2 -3
  62. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  63. mlrun/k8s_utils.py +48 -2
  64. mlrun/launcher/client.py +6 -6
  65. mlrun/launcher/local.py +2 -2
  66. mlrun/model.py +215 -34
  67. mlrun/model_monitoring/api.py +38 -24
  68. mlrun/model_monitoring/applications/__init__.py +1 -2
  69. mlrun/model_monitoring/applications/_application_steps.py +60 -29
  70. mlrun/model_monitoring/applications/base.py +2 -174
  71. mlrun/model_monitoring/applications/context.py +197 -70
  72. mlrun/model_monitoring/applications/evidently_base.py +11 -85
  73. mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
  74. mlrun/model_monitoring/applications/results.py +4 -4
  75. mlrun/model_monitoring/controller.py +110 -282
  76. mlrun/model_monitoring/db/stores/__init__.py +8 -3
  77. mlrun/model_monitoring/db/stores/base/store.py +3 -0
  78. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  79. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
  80. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
  81. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
  82. mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
  83. mlrun/model_monitoring/db/tsdb/base.py +147 -15
  84. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
  85. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
  86. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
  87. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
  88. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
  89. mlrun/model_monitoring/helpers.py +70 -50
  90. mlrun/model_monitoring/stream_processing.py +96 -195
  91. mlrun/model_monitoring/writer.py +13 -5
  92. mlrun/package/packagers/default_packager.py +2 -2
  93. mlrun/projects/operations.py +16 -8
  94. mlrun/projects/pipelines.py +126 -115
  95. mlrun/projects/project.py +286 -129
  96. mlrun/render.py +3 -3
  97. mlrun/run.py +38 -19
  98. mlrun/runtimes/__init__.py +19 -8
  99. mlrun/runtimes/base.py +4 -1
  100. mlrun/runtimes/daskjob.py +1 -1
  101. mlrun/runtimes/funcdoc.py +1 -1
  102. mlrun/runtimes/kubejob.py +6 -6
  103. mlrun/runtimes/local.py +12 -5
  104. mlrun/runtimes/nuclio/api_gateway.py +68 -8
  105. mlrun/runtimes/nuclio/application/application.py +307 -70
  106. mlrun/runtimes/nuclio/function.py +63 -14
  107. mlrun/runtimes/nuclio/serving.py +10 -10
  108. mlrun/runtimes/pod.py +25 -19
  109. mlrun/runtimes/remotesparkjob.py +2 -5
  110. mlrun/runtimes/sparkjob/spark3job.py +16 -17
  111. mlrun/runtimes/utils.py +34 -0
  112. mlrun/serving/routers.py +2 -5
  113. mlrun/serving/server.py +37 -19
  114. mlrun/serving/states.py +30 -3
  115. mlrun/serving/v2_serving.py +44 -35
  116. mlrun/track/trackers/mlflow_tracker.py +5 -0
  117. mlrun/utils/async_http.py +1 -1
  118. mlrun/utils/db.py +18 -0
  119. mlrun/utils/helpers.py +150 -36
  120. mlrun/utils/http.py +1 -1
  121. mlrun/utils/notifications/notification/__init__.py +0 -1
  122. mlrun/utils/notifications/notification/webhook.py +8 -1
  123. mlrun/utils/notifications/notification_pusher.py +1 -1
  124. mlrun/utils/v3io_clients.py +2 -2
  125. mlrun/utils/version/version.json +2 -2
  126. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
  127. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
  128. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
  129. mlrun/feature_store/retrieval/conversion.py +0 -271
  130. mlrun/model_monitoring/controller_handler.py +0 -37
  131. mlrun/model_monitoring/evidently_application.py +0 -20
  132. mlrun/model_monitoring/prometheus.py +0 -216
  133. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
  134. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
  135. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
@@ -18,24 +18,19 @@ import json
18
18
  import os
19
19
  import re
20
20
  from collections.abc import Iterator
21
- from typing import Any, NamedTuple, Optional, Union, cast
21
+ from typing import NamedTuple, Optional, Union, cast
22
22
 
23
23
  import nuclio
24
24
 
25
25
  import mlrun
26
26
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
27
27
  import mlrun.data_types.infer
28
- import mlrun.feature_store as fstore
29
28
  import mlrun.model_monitoring.db.stores
30
- from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
31
29
  from mlrun.datastore import get_stream_pusher
32
- from mlrun.datastore.targets import ParquetTarget
33
30
  from mlrun.errors import err_to_str
34
31
  from mlrun.model_monitoring.helpers import (
35
32
  _BatchDict,
36
33
  batch_dict2timedelta,
37
- calculate_inputs_statistics,
38
- get_monitoring_parquet_path,
39
34
  get_stream_path,
40
35
  )
41
36
  from mlrun.utils import datetime_now, logger
@@ -218,7 +213,7 @@ class _BatchWindowGenerator:
218
213
  # If the endpoint does not have a stream, `last_updated` should be
219
214
  # the minimum between the current time and the last updated time.
220
215
  # This compensates for the bumping mechanism - see
221
- # `bump_model_endpoint_last_request`.
216
+ # `update_model_endpoint_last_request`.
222
217
  last_updated = min(int(datetime_now().timestamp()), last_updated)
223
218
  logger.debug(
224
219
  "The endpoint does not have a stream", last_updated=last_updated
@@ -273,26 +268,14 @@ class MonitoringApplicationController:
273
268
  Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
274
269
  """
275
270
 
276
- def __init__(
277
- self,
278
- mlrun_context: mlrun.run.MLClientCtx,
279
- project: str,
280
- ):
281
- """
282
- Initialize Monitoring Application Processor object.
271
+ def __init__(self) -> None:
272
+ """Initialize Monitoring Application Controller"""
273
+ self.project = cast(str, mlrun.mlconf.default_project)
274
+ self.project_obj = mlrun.load_project(name=self.project, url=self.project)
283
275
 
284
- :param mlrun_context: An MLRun context.
285
- :param project: Project name.
286
- """
287
- self.context = mlrun_context
288
- self.project = project
289
- self.project_obj = mlrun.get_or_create_project(project)
276
+ logger.debug(f"Initializing {self.__class__.__name__}", project=self.project)
290
277
 
291
- mlrun_context.logger.debug(
292
- f"Initializing {self.__class__.__name__}", project=project
293
- )
294
-
295
- self.db = mlrun.model_monitoring.get_store_object(project=project)
278
+ self.db = mlrun.model_monitoring.get_store_object(project=self.project)
296
279
 
297
280
  self._batch_window_generator = _BatchWindowGenerator(
298
281
  batch_dict=json.loads(
@@ -303,15 +286,9 @@ class MonitoringApplicationController:
303
286
  )
304
287
 
305
288
  self.model_monitoring_access_key = self._get_model_monitoring_access_key()
306
- self.parquet_directory = get_monitoring_parquet_path(
307
- self.project_obj,
308
- kind=mm_constants.FileTargetKind.APPS_PARQUET,
289
+ self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
290
+ project=self.project
309
291
  )
310
- self.storage_options = None
311
- if not mlrun.mlconf.is_ce_mode():
312
- self._initialize_v3io_configurations()
313
- elif self.parquet_directory.startswith("s3://"):
314
- self.storage_options = mlrun.mlconf.get_s3_storage_options()
315
292
 
316
293
  @staticmethod
317
294
  def _get_model_monitoring_access_key() -> Optional[str]:
@@ -321,98 +298,85 @@ class MonitoringApplicationController:
321
298
  access_key = mlrun.mlconf.get_v3io_access_key()
322
299
  return access_key
323
300
 
324
- def _initialize_v3io_configurations(self) -> None:
325
- self.v3io_framesd = mlrun.mlconf.v3io_framesd
326
- self.v3io_api = mlrun.mlconf.v3io_api
327
- self.storage_options = dict(
328
- v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
329
- )
330
-
331
- def run(self, event: nuclio.Event):
301
+ def run(self) -> None:
332
302
  """
333
- Main method for run all the relevant monitoring applications on each endpoint
334
-
335
- :param event: trigger event
303
+ Main method for run all the relevant monitoring applications on each endpoint.
304
+ This method handles the following:
305
+ 1. List model endpoints
306
+ 2. List applications
307
+ 3. Check model monitoring windows
308
+ 4. Send data to applications
309
+ 5. Delete old parquets
336
310
  """
337
311
  logger.info("Start running monitoring controller")
338
312
  try:
339
313
  applications_names = []
340
- endpoints = self.db.list_model_endpoints()
314
+ endpoints = self.db.list_model_endpoints(include_stats=True)
341
315
  if not endpoints:
342
- self.context.logger.info(
343
- "No model endpoints found", project=self.project
344
- )
316
+ logger.info("No model endpoints found", project=self.project)
345
317
  return
346
318
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
347
319
  if monitoring_functions:
348
- # Gets only application in ready state
349
320
  applications_names = list(
350
- {
351
- app.metadata.name
352
- for app in monitoring_functions
353
- if (
354
- app.status.state == "ready"
355
- # workaround for the default app, as its `status.state` is `None`
356
- or app.metadata.name
357
- == mm_constants.HistogramDataDriftApplicationConstants.NAME
358
- )
359
- }
321
+ {app.metadata.name for app in monitoring_functions}
360
322
  )
323
+ # if monitoring_functions: - TODO : ML-7700
324
+ # Gets only application in ready state
325
+ # applications_names = list(
326
+ # {
327
+ # app.metadata.name
328
+ # for app in monitoring_functions
329
+ # if (
330
+ # app.status.state == "ready"
331
+ # # workaround for the default app, as its `status.state` is `None`
332
+ # or app.metadata.name
333
+ # == mm_constants.HistogramDataDriftApplicationConstants.NAME
334
+ # )
335
+ # }
336
+ # )
361
337
  if not applications_names:
362
- self.context.logger.info(
363
- "No monitoring functions found", project=self.project
364
- )
338
+ logger.info("No monitoring functions found", project=self.project)
365
339
  return
366
- self.context.logger.info(
340
+ logger.info(
367
341
  "Starting to iterate over the applications",
368
342
  applications=applications_names,
369
343
  )
370
344
 
371
345
  except Exception as e:
372
- self.context.logger.error(
346
+ logger.error(
373
347
  "Failed to list endpoints and monitoring applications",
374
348
  exc=err_to_str(e),
375
349
  )
376
350
  return
377
351
  # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
378
- pool = concurrent.futures.ProcessPoolExecutor(
352
+ with concurrent.futures.ThreadPoolExecutor(
379
353
  max_workers=min(len(endpoints), 10),
380
- )
381
- futures = []
382
- for endpoint in endpoints:
383
- if (
384
- endpoint[mm_constants.EventFieldType.ACTIVE]
385
- and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
386
- == mm_constants.ModelMonitoringMode.enabled.value
387
- ):
388
- # Skip router endpoint:
354
+ ) as pool:
355
+ for endpoint in endpoints:
389
356
  if (
390
- int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
391
- == mm_constants.EndpointType.ROUTER
357
+ endpoint[mm_constants.EventFieldType.ACTIVE]
358
+ and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
359
+ == mm_constants.ModelMonitoringMode.enabled.value
392
360
  ):
393
- # Router endpoint has no feature stats
394
- logger.info(
395
- f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
361
+ # Skip router endpoint:
362
+ if (
363
+ int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
364
+ == mm_constants.EndpointType.ROUTER
365
+ ):
366
+ # Router endpoint has no feature stats
367
+ logger.info(
368
+ f"{endpoint[mm_constants.EventFieldType.UID]} is router, skipping"
369
+ )
370
+ continue
371
+ pool.submit(
372
+ MonitoringApplicationController.model_endpoint_process,
373
+ endpoint=endpoint,
374
+ applications_names=applications_names,
375
+ batch_window_generator=self._batch_window_generator,
376
+ project=self.project,
377
+ model_monitoring_access_key=self.model_monitoring_access_key,
378
+ tsdb_connector=self.tsdb_connector,
396
379
  )
397
- continue
398
- future = pool.submit(
399
- MonitoringApplicationController.model_endpoint_process,
400
- endpoint=endpoint,
401
- applications_names=applications_names,
402
- batch_window_generator=self._batch_window_generator,
403
- project=self.project,
404
- parquet_directory=self.parquet_directory,
405
- storage_options=self.storage_options,
406
- model_monitoring_access_key=self.model_monitoring_access_key,
407
- )
408
- futures.append(future)
409
-
410
- for future in concurrent.futures.as_completed(futures):
411
- result = future.result()
412
- if result:
413
- self.context.log_results(result)
414
-
415
- self._delete_old_parquet(endpoints=endpoints)
416
380
 
417
381
  @classmethod
418
382
  def model_endpoint_process(
@@ -421,10 +385,9 @@ class MonitoringApplicationController:
421
385
  applications_names: list[str],
422
386
  batch_window_generator: _BatchWindowGenerator,
423
387
  project: str,
424
- parquet_directory: str,
425
- storage_options: dict,
426
388
  model_monitoring_access_key: str,
427
- ) -> Optional[dict[str, list[str]]]:
389
+ tsdb_connector: mlrun.model_monitoring.db.tsdb.TSDBConnector,
390
+ ) -> None:
428
391
  """
429
392
  Process a model endpoint and trigger the monitoring applications. This function running on different process
430
393
  for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -434,18 +397,13 @@ class MonitoringApplicationController:
434
397
  :param applications_names: (list[str]) List of application names to push results to.
435
398
  :param batch_window_generator: (_BatchWindowGenerator) An object that generates _BatchWindow objects.
436
399
  :param project: (str) Project name.
437
- :param parquet_directory: (str) Directory to store application parquet files
438
- :param storage_options: (dict) Storage options for writing ParquetTarget.
439
400
  :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
440
-
401
+ :param tsdb_connector: (mlrun.model_monitoring.db.tsdb.TSDBConnector) TSDB connector
441
402
  """
442
403
  endpoint_id = endpoint[mm_constants.EventFieldType.UID]
443
- start_times: set[datetime.datetime] = set()
404
+ # if false the endpoint represent batch infer step.
405
+ has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
444
406
  try:
445
- m_fs = fstore.get_feature_set(
446
- endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
447
- )
448
-
449
407
  for application in applications_names:
450
408
  batch_window = batch_window_generator.get_batch_window(
451
409
  project=project,
@@ -453,168 +411,75 @@ class MonitoringApplicationController:
453
411
  application=application,
454
412
  first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
455
413
  last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
456
- has_stream=endpoint[mm_constants.EventFieldType.STREAM_PATH] != "",
414
+ has_stream=has_stream,
457
415
  )
458
416
 
459
417
  for start_infer_time, end_infer_time in batch_window.get_intervals():
460
- # start - TODO : delete in 1.9.0 (V1 app deprecation)
461
- try:
462
- # Get application sample data
463
- offline_response = cls._get_sample_df(
464
- feature_set=m_fs,
418
+ prediction_metric = tsdb_connector.read_predictions(
419
+ endpoint_id=endpoint_id,
420
+ start=start_infer_time,
421
+ end=end_infer_time,
422
+ )
423
+ if not prediction_metric.data and has_stream:
424
+ logger.info(
425
+ "No data found for the given interval",
426
+ start=start_infer_time,
427
+ end=end_infer_time,
465
428
  endpoint_id=endpoint_id,
429
+ )
430
+ else:
431
+ logger.info(
432
+ "Data found for the given interval",
433
+ start=start_infer_time,
434
+ end=end_infer_time,
435
+ endpoint_id=endpoint_id,
436
+ )
437
+ cls._push_to_applications(
466
438
  start_infer_time=start_infer_time,
467
439
  end_infer_time=end_infer_time,
468
- parquet_directory=parquet_directory,
469
- storage_options=storage_options,
470
- application_name=application,
471
- )
472
-
473
- df = offline_response.to_dataframe()
474
- parquet_target_path = offline_response.vector.get_target_path()
475
-
476
- if len(df) == 0:
477
- logger.info(
478
- "During this time window, the endpoint has not received any data",
479
- endpoint=endpoint[mm_constants.EventFieldType.UID],
480
- start_time=start_infer_time,
481
- end_time=end_infer_time,
482
- )
483
- continue
484
-
485
- except FileNotFoundError:
486
- logger.warn(
487
- "No parquets were written yet",
488
- endpoint=endpoint[mm_constants.EventFieldType.UID],
440
+ endpoint_id=endpoint_id,
441
+ project=project,
442
+ applications_names=[application],
443
+ model_monitoring_access_key=model_monitoring_access_key,
489
444
  )
490
- continue
491
-
492
- # Get the timestamp of the latest request:
493
- latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
494
-
495
- # Get the feature stats from the model endpoint for reference data
496
- feature_stats = json.loads(
497
- endpoint[mm_constants.EventFieldType.FEATURE_STATS]
498
- )
499
-
500
- # Pad the original feature stats to accommodate current
501
- # data out of the original range (unless already padded)
502
- pad_features_hist(FeatureStats(feature_stats))
503
-
504
- # Get the current stats:
505
- current_stats = calculate_inputs_statistics(
506
- sample_set_statistics=feature_stats, inputs=df
507
- )
508
- # end - TODO : delete in 1.9.0 (V1 app deprecation)
509
- cls._push_to_applications(
510
- current_stats=current_stats,
511
- feature_stats=feature_stats,
512
- start_infer_time=start_infer_time,
513
- end_infer_time=end_infer_time,
514
- endpoint_id=endpoint_id,
515
- latest_request=latest_request,
516
- project=project,
517
- applications_names=[application],
518
- model_monitoring_access_key=model_monitoring_access_key,
519
- parquet_target_path=parquet_target_path,
520
- )
521
- start_times.add(start_infer_time)
522
445
  except Exception:
523
446
  logger.exception(
524
447
  "Encountered an exception",
525
448
  endpoint_id=endpoint[mm_constants.EventFieldType.UID],
526
449
  )
527
450
 
528
- if start_times:
529
- return {endpoint_id: [str(t) for t in sorted(list(start_times))]}
530
-
531
- def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
532
- """
533
- Delete application parquets older than the argument days.
534
-
535
- :param endpoints: A list of dictionaries of model endpoints records.
536
- """
537
- if self.parquet_directory.startswith("v3io:///"):
538
- # create fs with access to the user side (under projects)
539
- store, _, _ = mlrun.store_manager.get_or_create_store(
540
- self.parquet_directory,
541
- {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
542
- )
543
- fs = store.filesystem
544
-
545
- # calculate time threshold (keep only files from the last 24 hours)
546
- time_to_keep = (
547
- datetime.datetime.now(tz=datetime.timezone.utc)
548
- - datetime.timedelta(days=days)
549
- ).timestamp()
550
-
551
- for endpoint in endpoints:
552
- try:
553
- apps_parquet_directories = fs.listdir(
554
- path=f"{self.parquet_directory}"
555
- f"/key={endpoint[mm_constants.EventFieldType.UID]}"
556
- )
557
- for directory in apps_parquet_directories:
558
- if directory["mtime"] < time_to_keep:
559
- # Delete files
560
- fs.rm(path=directory["name"], recursive=True)
561
- # Delete directory
562
- fs.rmdir(path=directory["name"])
563
- except FileNotFoundError:
564
- logger.info(
565
- "Application parquet directory is empty, "
566
- "probably parquets have not yet been created for this app",
567
- endpoint=endpoint[mm_constants.EventFieldType.UID],
568
- path=f"{self.parquet_directory}"
569
- f"/key={endpoint[mm_constants.EventFieldType.UID]}",
570
- )
571
-
572
451
  @staticmethod
573
452
  def _push_to_applications(
574
- current_stats,
575
- feature_stats,
576
- start_infer_time,
577
- end_infer_time,
578
- endpoint_id,
579
- latest_request,
580
- project,
581
- applications_names,
582
- model_monitoring_access_key,
583
- parquet_target_path,
453
+ start_infer_time: datetime.datetime,
454
+ end_infer_time: datetime.datetime,
455
+ endpoint_id: str,
456
+ project: str,
457
+ applications_names: list[str],
458
+ model_monitoring_access_key: str,
584
459
  ):
585
460
  """
586
461
  Pushes data to multiple stream applications.
587
462
 
588
- :param current_stats: Current statistics of input data.
589
- :param feature_stats: Statistics of train features.
590
- :param start_infer_time: The beginning of the infer interval window.
591
- :param end_infer_time: The end of the infer interval window.
592
- :param endpoint_id: Identifier for the model endpoint.
593
- :param latest_request: Timestamp of the latest model request.
594
- :param project: mlrun Project name.
595
- :param applications_names: List of application names to which data will be pushed.
463
+ :param start_infer_time: The beginning of the infer interval window.
464
+ :param end_infer_time: The end of the infer interval window.
465
+ :param endpoint_id: Identifier for the model endpoint.
466
+ :param project: mlrun Project name.
467
+ :param applications_names: List of application names to which data will be pushed.
468
+ :param model_monitoring_access_key: Access key to apply the model monitoring process.
596
469
 
597
470
  """
598
-
599
471
  data = {
600
- mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
601
- mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
602
- mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: parquet_target_path,
603
472
  mm_constants.ApplicationEvent.START_INFER_TIME: start_infer_time.isoformat(
604
473
  sep=" ", timespec="microseconds"
605
474
  ),
606
475
  mm_constants.ApplicationEvent.END_INFER_TIME: end_infer_time.isoformat(
607
476
  sep=" ", timespec="microseconds"
608
477
  ),
609
- mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
610
- sep=" ", timespec="microseconds"
611
- ),
612
478
  mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
613
479
  mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
614
480
  project=project,
615
481
  function_name=mm_constants.MonitoringFunctionNames.WRITER,
616
482
  ),
617
- mm_constants.ApplicationEvent.MLRUN_CONTEXT: {}, # TODO : for future use by ad-hoc batch infer
618
483
  }
619
484
  for app_name in applications_names:
620
485
  data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
@@ -627,49 +492,12 @@ class MonitoringApplicationController:
627
492
  [data]
628
493
  )
629
494
 
630
- @staticmethod
631
- def _get_sample_df(
632
- feature_set: mlrun.common.schemas.FeatureSet,
633
- endpoint_id: str,
634
- start_infer_time: datetime.datetime,
635
- end_infer_time: datetime.datetime,
636
- parquet_directory: str,
637
- storage_options: dict,
638
- application_name: str,
639
- ) -> mlrun.feature_store.OfflineVectorResponse:
640
- """
641
- Retrieves a sample DataFrame of the current input according to the provided infer interval window.
642
-
643
- :param feature_set: The main feature set.
644
- :param endpoint_id: Identifier for the model endpoint.
645
- :param start_infer_time: The beginning of the infer interval window.
646
- :param end_infer_time: The end of the infer interval window.
647
- :param parquet_directory: Directory where Parquet files are stored.
648
- :param storage_options: Storage options for accessing the data.
649
- :param application_name: Current application name.
650
495
 
651
- :return: OfflineVectorResponse that can be used for generating a sample DataFrame for the specified endpoint.
496
+ def handler(context: nuclio.Context, event: nuclio.Event) -> None:
497
+ """
498
+ Run model monitoring application processor
652
499
 
653
- """
654
- features = [f"{feature_set.metadata.name}.*"]
655
- vector = fstore.FeatureVector(
656
- name=f"{endpoint_id}_vector",
657
- features=features,
658
- with_indexes=True,
659
- )
660
- vector.metadata.tag = application_name
661
- vector.feature_set_objects = {feature_set.metadata.name: feature_set}
662
-
663
- # get offline features based on application start and end time.
664
- # store the result parquet by partitioning by controller end processing time
665
- offline_response = vector.get_offline_features(
666
- start_time=start_infer_time,
667
- end_time=end_infer_time,
668
- timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
669
- target=ParquetTarget(
670
- path=parquet_directory
671
- + f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
672
- storage_options=storage_options,
673
- ),
674
- )
675
- return offline_response
500
+ :param context: the Nuclio context
501
+ :param event: trigger event
502
+ """
503
+ MonitoringApplicationController().run()
@@ -63,7 +63,7 @@ class ObjectStoreFactory(enum.Enum):
63
63
  :param value: Provided enum (invalid) value.
64
64
  """
65
65
  valid_values = list(cls.__members__.keys())
66
- raise mlrun.errors.MLRunInvalidMMStoreType(
66
+ raise mlrun.errors.MLRunInvalidMMStoreTypeError(
67
67
  f"{value} is not a valid endpoint store, please choose a valid value: %{valid_values}."
68
68
  )
69
69
 
@@ -100,7 +100,9 @@ def get_store_object(
100
100
  :param store_connection_string: Optional explicit connection string of the store.
101
101
 
102
102
  :return: `StoreBase` object. Using this object, the user can apply different operations such as write, update, get
103
- and delete a model endpoint record.
103
+ and delete a model endpoint record.
104
+ :raise: `MLRunInvalidMMStoreTypeError` if the user didn't provide store connection
105
+ or the provided store connection is invalid.
104
106
  """
105
107
 
106
108
  store_connection_string = (
@@ -121,7 +123,10 @@ def get_store_object(
121
123
  mlrun.common.schemas.model_monitoring.ModelEndpointTarget.V3IO_NOSQL
122
124
  )
123
125
  else:
124
- store_type = None
126
+ raise mlrun.errors.MLRunInvalidMMStoreTypeError(
127
+ "You must provide a valid store connection by using "
128
+ "set_model_monitoring_credentials API."
129
+ )
125
130
  # Get store type value from ObjectStoreFactory enum class
126
131
  store_type_fact = ObjectStoreFactory(store_type)
127
132
 
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import json
15
16
  import typing
16
17
  from abc import ABC, abstractmethod
@@ -94,6 +95,7 @@ class StoreBase(ABC):
94
95
  labels: list[str] = None,
95
96
  top_level: bool = None,
96
97
  uids: list = None,
98
+ include_stats: bool = None,
97
99
  ) -> list[dict[str, typing.Any]]:
98
100
  """
99
101
  Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
@@ -107,6 +109,7 @@ class StoreBase(ABC):
107
109
  key (i.e. "key").
108
110
  :param top_level: If True will return only routers and endpoint that are NOT children of any router.
109
111
  :param uids: List of model endpoint unique ids to include in the result.
112
+ :param include_stats: If True, will include model endpoint statistics in the result.
110
113
 
111
114
  :return: A list of model endpoint dictionaries.
112
115
  """
@@ -11,8 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  from sqlalchemy import (
15
- TIMESTAMP,
16
+ DATETIME,
17
+ TIMESTAMP, # TODO: migrate to DATETIME, see ML-6921
16
18
  Boolean,
17
19
  Column,
18
20
  Float,
@@ -90,11 +92,11 @@ class ModelEndpointsBaseTable(BaseModel):
90
92
  metrics = Column(EventFieldType.METRICS, Text)
91
93
  first_request = Column(
92
94
  EventFieldType.FIRST_REQUEST,
93
- TIMESTAMP(timezone=True),
95
+ TIMESTAMP(timezone=True), # TODO: migrate to DATETIME, see ML-6921
94
96
  )
95
97
  last_request = Column(
96
98
  EventFieldType.LAST_REQUEST,
97
- TIMESTAMP(timezone=True),
99
+ TIMESTAMP(timezone=True), # TODO: migrate to DATETIME, see ML-6921
98
100
  )
99
101
 
100
102
 
@@ -122,11 +124,11 @@ class ApplicationResultBaseTable(BaseModel):
122
124
 
123
125
  start_infer_time = Column(
124
126
  WriterEvent.START_INFER_TIME,
125
- TIMESTAMP(timezone=True),
127
+ DATETIME(timezone=True),
126
128
  )
127
129
  end_infer_time = Column(
128
130
  WriterEvent.END_INFER_TIME,
129
- TIMESTAMP(timezone=True),
131
+ DATETIME(timezone=True),
130
132
  )
131
133
 
132
134
  result_status = Column(ResultData.RESULT_STATUS, String(10))
@@ -152,11 +154,11 @@ class ApplicationMetricsBaseTable(BaseModel):
152
154
  )
153
155
  start_infer_time = Column(
154
156
  WriterEvent.START_INFER_TIME,
155
- TIMESTAMP(timezone=True),
157
+ DATETIME(timezone=True),
156
158
  )
157
159
  end_infer_time = Column(
158
160
  WriterEvent.END_INFER_TIME,
159
- TIMESTAMP(timezone=True),
161
+ DATETIME(timezone=True),
160
162
  )
161
163
  metric_name = Column(
162
164
  MetricData.METRIC_NAME,