mlrun 1.8.0rc5__py3-none-any.whl → 1.8.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (74) hide show
  1. mlrun/__init__.py +1 -0
  2. mlrun/artifacts/__init__.py +1 -1
  3. mlrun/artifacts/base.py +21 -1
  4. mlrun/artifacts/document.py +62 -39
  5. mlrun/artifacts/manager.py +12 -5
  6. mlrun/common/constants.py +1 -0
  7. mlrun/common/model_monitoring/__init__.py +0 -2
  8. mlrun/common/model_monitoring/helpers.py +0 -28
  9. mlrun/common/schemas/__init__.py +2 -4
  10. mlrun/common/schemas/alert.py +77 -1
  11. mlrun/common/schemas/client_spec.py +0 -1
  12. mlrun/common/schemas/model_monitoring/__init__.py +0 -6
  13. mlrun/common/schemas/model_monitoring/constants.py +11 -9
  14. mlrun/common/schemas/model_monitoring/model_endpoints.py +77 -149
  15. mlrun/common/schemas/notification.py +6 -0
  16. mlrun/common/schemas/project.py +3 -0
  17. mlrun/config.py +2 -3
  18. mlrun/datastore/datastore_profile.py +57 -17
  19. mlrun/datastore/sources.py +1 -2
  20. mlrun/datastore/store_resources.py +7 -2
  21. mlrun/datastore/vectorstore.py +99 -62
  22. mlrun/db/base.py +34 -20
  23. mlrun/db/httpdb.py +249 -163
  24. mlrun/db/nopdb.py +40 -17
  25. mlrun/execution.py +14 -7
  26. mlrun/feature_store/api.py +1 -0
  27. mlrun/model.py +3 -0
  28. mlrun/model_monitoring/__init__.py +3 -2
  29. mlrun/model_monitoring/api.py +64 -53
  30. mlrun/model_monitoring/applications/_application_steps.py +3 -1
  31. mlrun/model_monitoring/applications/base.py +115 -15
  32. mlrun/model_monitoring/applications/context.py +42 -24
  33. mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
  34. mlrun/model_monitoring/controller.py +43 -37
  35. mlrun/model_monitoring/db/__init__.py +0 -2
  36. mlrun/model_monitoring/db/tsdb/base.py +2 -1
  37. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +2 -1
  38. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +43 -0
  39. mlrun/model_monitoring/helpers.py +78 -66
  40. mlrun/model_monitoring/stream_processing.py +83 -270
  41. mlrun/model_monitoring/writer.py +1 -10
  42. mlrun/projects/pipelines.py +37 -1
  43. mlrun/projects/project.py +173 -70
  44. mlrun/run.py +40 -0
  45. mlrun/runtimes/nuclio/function.py +7 -6
  46. mlrun/runtimes/nuclio/serving.py +9 -4
  47. mlrun/serving/routers.py +158 -145
  48. mlrun/serving/server.py +6 -0
  49. mlrun/serving/states.py +21 -7
  50. mlrun/serving/v2_serving.py +94 -68
  51. mlrun/utils/helpers.py +23 -33
  52. mlrun/utils/notifications/notification/mail.py +17 -6
  53. mlrun/utils/notifications/notification_pusher.py +9 -5
  54. mlrun/utils/regex.py +8 -1
  55. mlrun/utils/version/version.json +2 -2
  56. {mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/METADATA +2 -2
  57. {mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/RECORD +61 -74
  58. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +0 -149
  59. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  60. mlrun/model_monitoring/db/stores/base/__init__.py +0 -15
  61. mlrun/model_monitoring/db/stores/base/store.py +0 -154
  62. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  63. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -46
  64. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -93
  65. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -47
  66. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -25
  67. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -408
  68. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  69. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -464
  70. mlrun/model_monitoring/model_endpoint.py +0 -120
  71. {mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/LICENSE +0 -0
  72. {mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/WHEEL +0 -0
  73. {mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/entry_points.txt +0 -0
  74. {mlrun-1.8.0rc5.dist-info → mlrun-1.8.0rc9.dist-info}/top_level.txt +0 -0
mlrun/db/nopdb.py CHANGED
@@ -22,6 +22,7 @@ import mlrun.common.runtimes.constants
22
22
  import mlrun.common.schemas
23
23
  import mlrun.errors
24
24
  import mlrun.lists
25
+ import mlrun.model_monitoring
25
26
 
26
27
  from ..config import config
27
28
  from ..utils import logger
@@ -573,39 +574,58 @@ class NopDB(RunDBInterface):
573
574
 
574
575
  def create_model_endpoint(
575
576
  self,
576
- project: str,
577
- endpoint_id: str,
578
577
  model_endpoint: mlrun.common.schemas.ModelEndpoint,
579
- ):
578
+ ) -> mlrun.common.schemas.ModelEndpoint:
580
579
  pass
581
580
 
582
- def delete_model_endpoint(self, project: str, endpoint_id: str):
581
+ def delete_model_endpoint(
582
+ self,
583
+ name: str,
584
+ project: str,
585
+ function_name: Optional[str] = None,
586
+ function_tag: Optional[str] = None,
587
+ endpoint_id: Optional[str] = None,
588
+ ):
583
589
  pass
584
590
 
585
591
  def list_model_endpoints(
586
592
  self,
587
593
  project: str,
588
- model: Optional[str] = None,
589
- function: Optional[str] = None,
594
+ name: Optional[str] = None,
595
+ function_name: Optional[str] = None,
596
+ function_tag: Optional[str] = None,
597
+ model_name: Optional[str] = None,
590
598
  labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
591
- start: str = "now-1h",
592
- end: str = "now",
593
- metrics: Optional[list[str]] = None,
594
- ):
599
+ start: Optional[datetime.datetime] = None,
600
+ end: Optional[datetime.datetime] = None,
601
+ tsdb_metrics: bool = True,
602
+ top_level: bool = False,
603
+ uids: Optional[list[str]] = None,
604
+ latest_only: bool = False,
605
+ ) -> mlrun.common.schemas.ModelEndpointList:
595
606
  pass
596
607
 
597
608
  def get_model_endpoint(
598
609
  self,
610
+ name: str,
599
611
  project: str,
600
- endpoint_id: str,
601
- start: Optional[str] = None,
602
- end: Optional[str] = None,
603
- metrics: Optional[list[str]] = None,
604
- features: bool = False,
605
- ):
612
+ function_name: Optional[str] = None,
613
+ function_tag: Optional[str] = None,
614
+ endpoint_id: Optional[str] = None,
615
+ tsdb_metrics: bool = True,
616
+ feature_analysis: bool = False,
617
+ ) -> mlrun.common.schemas.ModelEndpoint:
606
618
  pass
607
619
 
608
- def patch_model_endpoint(self, project: str, endpoint_id: str, attributes: dict):
620
+ def patch_model_endpoint(
621
+ self,
622
+ name: str,
623
+ project: str,
624
+ attributes: dict,
625
+ function_name: Optional[str] = None,
626
+ function_tag: Optional[str] = None,
627
+ endpoint_id: Optional[str] = None,
628
+ ) -> mlrun.common.schemas.ModelEndpoint:
609
629
  pass
610
630
 
611
631
  def create_hub_source(
@@ -902,3 +922,6 @@ class NopDB(RunDBInterface):
902
922
  **kwargs,
903
923
  ):
904
924
  pass
925
+
926
+ def get_project_summary(self, project: str):
927
+ pass
mlrun/execution.py CHANGED
@@ -501,11 +501,11 @@ class MLClientCtx:
501
501
  return default
502
502
  return self._parameters[key]
503
503
 
504
- def get_project_object(self):
504
+ def get_project_object(self) -> Optional["mlrun.MlrunProject"]:
505
505
  """
506
506
  Get the MLRun project object by the project name set in the context.
507
507
 
508
- :return: The project object or None if it couldn't be retrieved.
508
+ :returns: The project object or None if it couldn't be retrieved.
509
509
  """
510
510
  return self._load_project_object()
511
511
 
@@ -877,28 +877,35 @@ class MLClientCtx:
877
877
  def log_document(
878
878
  self,
879
879
  key: str,
880
+ tag: str = "",
881
+ local_path: str = "",
880
882
  artifact_path: Optional[str] = None,
881
883
  document_loader: DocumentLoaderSpec = DocumentLoaderSpec(),
882
- tag: str = "",
883
884
  upload: Optional[bool] = False,
884
885
  labels: Optional[dict[str, str]] = None,
886
+ target_path: Optional[str] = None,
885
887
  **kwargs,
886
888
  ) -> DocumentArtifact:
887
889
  """
888
890
  Log a document as an artifact.
889
891
 
890
892
  :param key: Artifact key
891
- :param target_path: Path to the local file
892
- :param artifact_path: Target path for artifact storage
893
- :param document_loader: Spec to use to load the artifact as langchain document
894
893
  :param tag: Version tag
894
+ :param local_path: path to the local file we upload, will also be use
895
+ as the destination subpath (under "artifact_path")
896
+ :param artifact_path: Target artifact path (when not using the default)
897
+ to define a subpath under the default location use:
898
+ `artifact_path=context.artifact_subpath('data')`
899
+ :param document_loader: Spec to use to load the artifact as langchain document
895
900
  :param upload: Whether to upload the artifact
896
901
  :param labels: Key-value labels
902
+ :param target_path: Path to the local file
897
903
  :param kwargs: Additional keyword arguments
898
904
  :return: DocumentArtifact object
899
905
  """
900
906
  doc_artifact = DocumentArtifact(
901
907
  key=key,
908
+ original_source=local_path or target_path,
902
909
  document_loader=document_loader,
903
910
  **kwargs,
904
911
  )
@@ -1200,7 +1207,7 @@ class MLClientCtx:
1200
1207
  self._data_stores = store_manager.set(self._secrets_manager, db=self._rundb)
1201
1208
  self._artifacts_manager = ArtifactManager(db=self._rundb)
1202
1209
 
1203
- def _load_project_object(self):
1210
+ def _load_project_object(self) -> Optional["mlrun.MlrunProject"]:
1204
1211
  if not self._project_object:
1205
1212
  if not self._project:
1206
1213
  self.logger.warning(
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import copy
15
16
  import importlib.util
16
17
  import pathlib
mlrun/model.py CHANGED
@@ -445,6 +445,7 @@ class Credentials(ModelObj):
445
445
  class BaseMetadata(ModelObj):
446
446
  _default_fields_to_strip = ModelObj._default_fields_to_strip + [
447
447
  "hash",
448
+ "uid",
448
449
  # Below are environment specific fields, no need to keep when stripping
449
450
  "namespace",
450
451
  "project",
@@ -467,10 +468,12 @@ class BaseMetadata(ModelObj):
467
468
  categories=None,
468
469
  updated=None,
469
470
  credentials=None,
471
+ uid=None,
470
472
  ):
471
473
  self.name = name
472
474
  self.tag = tag
473
475
  self.hash = hash
476
+ self.uid = uid
474
477
  self.namespace = namespace
475
478
  self.project = project or ""
476
479
  self.labels = labels or {}
@@ -14,7 +14,8 @@
14
14
 
15
15
  # for backwards compatibility
16
16
 
17
- from .db import get_store_object, get_tsdb_connector
17
+ from mlrun.common.schemas import ModelEndpoint, ModelEndpointList
18
+
19
+ from .db import get_tsdb_connector
18
20
  from .helpers import get_stream_path
19
- from .model_endpoint import ModelEndpoint
20
21
  from .tracking_policy import TrackingPolicy
@@ -23,18 +23,28 @@ import pandas as pd
23
23
  import mlrun.artifacts
24
24
  import mlrun.common.helpers
25
25
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
26
+ import mlrun.datastore.base
26
27
  import mlrun.feature_store
27
28
  import mlrun.model_monitoring.applications as mm_app
28
29
  import mlrun.serving
30
+ from mlrun.common.schemas import ModelEndpoint
31
+ from mlrun.common.schemas.model_monitoring import (
32
+ FunctionURI,
33
+ )
29
34
  from mlrun.data_types.infer import InferOptions, get_df_stats
30
35
  from mlrun.utils import datetime_now, logger
31
36
 
32
37
  from .helpers import update_model_endpoint_last_request
33
- from .model_endpoint import ModelEndpoint
34
38
 
35
39
  # A union of all supported dataset types:
36
40
  DatasetType = typing.Union[
37
- mlrun.DataItem, list, dict, pd.DataFrame, pd.Series, np.ndarray, typing.Any
41
+ mlrun.datastore.base.DataItem,
42
+ list,
43
+ dict,
44
+ pd.DataFrame,
45
+ pd.Series,
46
+ np.ndarray,
47
+ typing.Any,
38
48
  ]
39
49
 
40
50
 
@@ -44,10 +54,8 @@ def get_or_create_model_endpoint(
44
54
  model_endpoint_name: str = "",
45
55
  endpoint_id: str = "",
46
56
  function_name: str = "",
47
- context: mlrun.MLClientCtx = None,
57
+ context: typing.Optional["mlrun.MLClientCtx"] = None,
48
58
  sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
49
- drift_threshold: typing.Optional[float] = None,
50
- possible_drift_threshold: typing.Optional[float] = None,
51
59
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
52
60
  db_session=None,
53
61
  ) -> ModelEndpoint:
@@ -68,10 +76,6 @@ def get_or_create_model_endpoint(
68
76
  full function hash.
69
77
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
70
78
  the new model endpoint (applicable only to new endpoint_id).
71
- :param drift_threshold: (deprecated) The threshold of which to mark drifts (applicable only to new
72
- endpoint_id).
73
- :param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts (applicable only to new
74
- endpoint_id).
75
79
  :param monitoring_mode: If enabled, apply model monitoring features on the provided endpoint id
76
80
  (applicable only to new endpoint_id).
77
81
  :param db_session: A runtime session that manages the current dialog with the database.
@@ -79,18 +83,15 @@ def get_or_create_model_endpoint(
79
83
  :return: A ModelEndpoint object
80
84
  """
81
85
 
82
- if not endpoint_id:
83
- # Generate a new model endpoint id based on the project name and model name
84
- endpoint_id = hashlib.sha1(
85
- f"{project}_{model_endpoint_name}".encode()
86
- ).hexdigest()
87
-
88
86
  if not db_session:
89
87
  # Generate a runtime database
90
88
  db_session = mlrun.get_run_db()
91
89
  try:
92
90
  model_endpoint = db_session.get_model_endpoint(
93
- project=project, endpoint_id=endpoint_id
91
+ project=project,
92
+ name=model_endpoint_name,
93
+ endpoint_id=endpoint_id,
94
+ function_name=function_name,
94
95
  )
95
96
  # If other fields provided, validate that they are correspond to the existing model endpoint data
96
97
  _model_endpoint_validations(
@@ -104,7 +105,6 @@ def get_or_create_model_endpoint(
104
105
  model_endpoint = _generate_model_endpoint(
105
106
  project=project,
106
107
  db_session=db_session,
107
- endpoint_id=endpoint_id,
108
108
  model_path=model_path,
109
109
  model_endpoint_name=model_endpoint_name,
110
110
  function_name=function_name,
@@ -121,7 +121,7 @@ def record_results(
121
121
  model_endpoint_name: str,
122
122
  endpoint_id: str = "",
123
123
  function_name: str = "",
124
- context: typing.Optional[mlrun.MLClientCtx] = None,
124
+ context: typing.Optional["mlrun.MLClientCtx"] = None,
125
125
  infer_results_df: typing.Optional[pd.DataFrame] = None,
126
126
  sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
127
127
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
@@ -208,13 +208,13 @@ def record_results(
208
208
  monitoring_mode=monitoring_mode,
209
209
  db_session=db,
210
210
  )
211
- logger.debug("Model endpoint", endpoint=model_endpoint.to_dict())
211
+ logger.debug("Model endpoint", endpoint=model_endpoint)
212
212
 
213
213
  timestamp = datetime_now()
214
214
  if infer_results_df is not None:
215
215
  # Write the monitoring parquet to the relevant model endpoint context
216
216
  write_monitoring_df(
217
- feature_set_uri=model_endpoint.status.monitoring_feature_set_uri,
217
+ feature_set_uri=model_endpoint.spec.monitoring_feature_set_uri,
218
218
  infer_datetime=timestamp,
219
219
  endpoint_id=model_endpoint.metadata.uid,
220
220
  infer_results_df=infer_results_df,
@@ -278,7 +278,7 @@ def _model_endpoint_validations(
278
278
  # Feature stats
279
279
  if (
280
280
  sample_set_statistics
281
- and sample_set_statistics != model_endpoint.status.feature_stats
281
+ and sample_set_statistics != model_endpoint.spec.feature_stats
282
282
  ):
283
283
  logger.warning(
284
284
  "Provided sample set statistics is different from the registered statistics. "
@@ -290,7 +290,7 @@ def write_monitoring_df(
290
290
  endpoint_id: str,
291
291
  infer_results_df: pd.DataFrame,
292
292
  infer_datetime: datetime,
293
- monitoring_feature_set: typing.Optional[mlrun.feature_store.FeatureSet] = None,
293
+ monitoring_feature_set: typing.Optional["mlrun.feature_store.FeatureSet"] = None,
294
294
  feature_set_uri: str = "",
295
295
  ) -> None:
296
296
  """Write infer results dataframe to the monitoring parquet target of the current model endpoint. The dataframe will
@@ -330,11 +330,10 @@ def write_monitoring_df(
330
330
  def _generate_model_endpoint(
331
331
  project: str,
332
332
  db_session,
333
- endpoint_id: str,
334
333
  model_path: str,
335
334
  model_endpoint_name: str,
336
335
  function_name: str,
337
- context: mlrun.MLClientCtx,
336
+ context: "mlrun.MLClientCtx",
338
337
  sample_set_statistics: dict[str, typing.Any],
339
338
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
340
339
  ) -> ModelEndpoint:
@@ -344,7 +343,6 @@ def _generate_model_endpoint(
344
343
  :param project: Project name.
345
344
 
346
345
  :param db_session: A session that manages the current dialog with the database.
347
- :param endpoint_id: Model endpoint unique ID.
348
346
  :param model_path: The model Store path.
349
347
  :param model_endpoint_name: Model endpoint name will be presented under the new model endpoint.
350
348
  :param function_name: If a new model endpoint is created, use this function name for generating the
@@ -355,34 +353,40 @@ def _generate_model_endpoint(
355
353
  the current model endpoint. Will be stored under
356
354
  `model_endpoint.status.feature_stats`.
357
355
 
358
- :return `mlrun.model_monitoring.model_endpoint.ModelEndpoint` object.
356
+ :return `mlrun.common.schemas.ModelEndpoint` object.
359
357
  """
360
- model_endpoint = ModelEndpoint()
361
- model_endpoint.metadata.project = project
362
- model_endpoint.metadata.uid = endpoint_id
363
- if function_name:
364
- model_endpoint.spec.function_uri = project + "/" + function_name
365
- elif not context:
366
- raise mlrun.errors.MLRunInvalidArgumentError(
367
- "Please provide either a function name or a valid MLRun context"
358
+ if not function_name and context:
359
+ function_name = FunctionURI.from_string(
360
+ context.to_dict()["spec"]["function"]
361
+ ).function
362
+ model_obj = None
363
+ if model_path:
364
+ model_obj: mlrun.artifacts.ModelArtifact = (
365
+ mlrun.datastore.store_resources.get_store_resource(
366
+ model_path, db=db_session
367
+ )
368
368
  )
369
- else:
370
- model_endpoint.spec.function_uri = context.to_dict()["spec"]["function"]
371
- model_endpoint.spec.model_uri = model_path
372
- model_endpoint.spec.model = model_endpoint_name
373
- model_endpoint.spec.model_class = "drift-analysis"
374
- model_endpoint.spec.monitoring_mode = monitoring_mode
375
- model_endpoint.status.first_request = model_endpoint.status.last_request = (
376
- datetime_now().isoformat()
377
- )
378
- if sample_set_statistics:
379
- model_endpoint.status.feature_stats = sample_set_statistics
380
-
381
- db_session.create_model_endpoint(
382
- project=project, endpoint_id=endpoint_id, model_endpoint=model_endpoint
369
+ current_time = datetime_now()
370
+ model_endpoint = mlrun.common.schemas.ModelEndpoint(
371
+ metadata=mlrun.common.schemas.ModelEndpointMetadata(
372
+ project=project,
373
+ name=model_endpoint_name,
374
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
375
+ ),
376
+ spec=mlrun.common.schemas.ModelEndpointSpec(
377
+ function_name=function_name,
378
+ model_name=model_obj.metadata.key if model_path else None,
379
+ model_uid=model_obj.metadata.uid if model_path else None,
380
+ model_class="drift-analysis",
381
+ ),
382
+ status=mlrun.common.schemas.ModelEndpointStatus(
383
+ monitoring_mode=monitoring_mode,
384
+ first_request=current_time,
385
+ last_request=current_time,
386
+ ),
383
387
  )
384
388
 
385
- return db_session.get_model_endpoint(project=project, endpoint_id=endpoint_id)
389
+ return db_session.create_model_endpoint(model_endpoint=model_endpoint)
386
390
 
387
391
 
388
392
  def get_sample_set_statistics(
@@ -531,7 +535,7 @@ def read_dataset_as_dataframe(
531
535
 
532
536
 
533
537
  def log_result(
534
- context: mlrun.MLClientCtx,
538
+ context: "mlrun.MLClientCtx",
535
539
  result_set_name: str,
536
540
  result_set: pd.DataFrame,
537
541
  artifacts_tag: str,
@@ -559,9 +563,7 @@ def _create_model_monitoring_function_base(
559
563
  project: str,
560
564
  func: typing.Union[str, None] = None,
561
565
  application_class: typing.Union[
562
- str,
563
- mm_app.ModelMonitoringApplicationBase,
564
- None,
566
+ str, "mm_app.ModelMonitoringApplicationBase", None
565
567
  ] = None,
566
568
  name: typing.Optional[str] = None,
567
569
  image: typing.Optional[str] = None,
@@ -620,4 +622,13 @@ def _create_model_monitoring_function_base(
620
622
  project=project,
621
623
  writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
622
624
  )
625
+
626
+ def block_to_mock_server(*args, **kwargs) -> typing.NoReturn:
627
+ raise NotImplementedError(
628
+ "Model monitoring serving functions do not support `.to_mock_server`. "
629
+ "You may call your model monitoring application object logic via the `.evaluate` method."
630
+ )
631
+
632
+ func_obj.to_mock_server = block_to_mock_server # Until ML-7643 is implemented
633
+
623
634
  return func_obj
@@ -16,6 +16,7 @@ import json
16
16
  import traceback
17
17
  from typing import Any, Optional, Union
18
18
 
19
+ import mlrun.common.schemas
19
20
  import mlrun.common.schemas.alert as alert_objects
20
21
  import mlrun.common.schemas.model_monitoring.constants as mm_constant
21
22
  import mlrun.datastore
@@ -81,6 +82,7 @@ class _PushToMonitoringWriter(StepToDict):
81
82
  self._lazy_init()
82
83
  application_results, application_context = event
83
84
  writer_event = {
85
+ mm_constant.WriterEvent.ENDPOINT_NAME: application_context.endpoint_name,
84
86
  mm_constant.WriterEvent.APPLICATION_NAME: application_context.application_name,
85
87
  mm_constant.WriterEvent.ENDPOINT_ID: application_context.endpoint_id,
86
88
  mm_constant.WriterEvent.START_INFER_TIME: application_context.start_infer_time.isoformat(
@@ -125,7 +127,7 @@ class _PrepareMonitoringEvent(StepToDict):
125
127
  """
126
128
  self.graph_context = context
127
129
  self.application_name = application_name
128
- self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
130
+ self.model_endpoints: dict[str, mlrun.common.schemas.ModelEndpoint] = {}
129
131
 
130
132
  def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
131
133
  """
@@ -12,10 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import socket
15
16
  from abc import ABC, abstractmethod
17
+ from datetime import datetime
16
18
  from typing import Any, Optional, Union, cast
17
19
 
20
+ import pandas as pd
21
+
18
22
  import mlrun
23
+ import mlrun.common.constants as mlrun_constants
24
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
25
+ import mlrun.errors
26
+ import mlrun.model_monitoring.api as mm_api
19
27
  import mlrun.model_monitoring.applications.context as mm_context
20
28
  import mlrun.model_monitoring.applications.results as mm_results
21
29
  from mlrun.serving.utils import MonitoringApplicationToDict
@@ -80,29 +88,83 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
80
88
  results = results if isinstance(results, list) else [results]
81
89
  return results, monitoring_context
82
90
 
83
- def _handler(self, context: "mlrun.MLClientCtx"):
91
+ def _handler(
92
+ self,
93
+ context: "mlrun.MLClientCtx",
94
+ sample_data: Optional[pd.DataFrame] = None,
95
+ reference_data: Optional[pd.DataFrame] = None,
96
+ endpoint_names: Optional[list[str]] = None,
97
+ start: Optional[datetime] = None,
98
+ end: Optional[datetime] = None,
99
+ ):
84
100
  """
85
101
  A custom handler that wraps the application's logic implemented in
86
102
  :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
87
103
  for an MLRun job.
88
104
  This method should not be called directly.
89
105
  """
90
- monitoring_context = mm_context.MonitoringApplicationContext(
91
- event={},
92
- application_name=self.__class__.__name__,
93
- logger=context.logger,
94
- artifacts_logger=context,
106
+ feature_stats = (
107
+ mm_api.get_sample_set_statistics(reference_data)
108
+ if reference_data is not None
109
+ else None
95
110
  )
96
- result = self.do_tracking(monitoring_context)
97
- return result
111
+
112
+ def call_do_tracking(event: Optional[dict] = None):
113
+ if event is None:
114
+ event = {}
115
+ monitoring_context = mm_context.MonitoringApplicationContext(
116
+ event=event,
117
+ application_name=self.__class__.__name__,
118
+ logger=context.logger,
119
+ artifacts_logger=context,
120
+ sample_df=sample_data,
121
+ feature_stats=feature_stats,
122
+ )
123
+ return self.do_tracking(monitoring_context)
124
+
125
+ if endpoint_names is not None:
126
+ start, end = self._validate_times(start, end)
127
+ for endpoint_name in endpoint_names:
128
+ result = call_do_tracking(
129
+ event={
130
+ mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
131
+ mm_constants.ApplicationEvent.START_INFER_TIME: start,
132
+ mm_constants.ApplicationEvent.END_INFER_TIME: end,
133
+ }
134
+ )
135
+ context.log_result(
136
+ f"{endpoint_name}_{start.isoformat()}_{end.isoformat()}", result
137
+ )
138
+ else:
139
+ return call_do_tracking()
140
+
141
+ @staticmethod
142
+ def _validate_times(
143
+ start: Optional[datetime], end: Optional[datetime]
144
+ ) -> tuple[datetime, datetime]:
145
+ if (start is None) or (end is None):
146
+ raise mlrun.errors.MLRunValueError(
147
+ "When `endpoint_names` is provided, you must also pass the start and end times"
148
+ )
149
+ return start, end
98
150
 
99
151
  @classmethod
100
152
  def evaluate(
101
153
  cls,
102
154
  func_path: Optional[str] = None,
103
155
  func_name: Optional[str] = None,
156
+ *,
104
157
  tag: Optional[str] = None,
105
158
  run_local: bool = True,
159
+ sample_data: Optional[pd.DataFrame] = None,
160
+ reference_data: Optional[pd.DataFrame] = None,
161
+ image: Optional[str] = None,
162
+ with_repo: Optional[bool] = False,
163
+ requirements: Optional[Union[str, list[str]]] = None,
164
+ requirements_file: str = "",
165
+ endpoint_names: Optional[list[str]] = None,
166
+ start: Optional[datetime] = None,
167
+ end: Optional[datetime] = None,
106
168
  ) -> "mlrun.RunObject":
107
169
  """
108
170
  Call this function to run the application's
@@ -113,30 +175,68 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
113
175
  :param func_name: The name of the function. If not passed, the class name is used.
114
176
  :param tag: An optional tag for the function.
115
177
  :param run_local: Whether to run the function locally or remotely.
178
+ :param sample_df: Optional - pandas data-frame as the current dataset.
179
+ When set, it replaces the data read from the model endpoint's offline source.
180
+ :param feature_stats: Optional - statistics dictionary of the reference data.
181
+ When set, it overrides the model endpoint's feature stats.
182
+ :param image: Docker image to run the job on.
183
+ :param with_repo: Whether to clone the current repo to the build source.
184
+ :param requirements: List of Python requirements to be installed in the image.
185
+ :param requirements_file: Path to a Python requirements file to be installed in the image.
186
+ :param endpoint_names: The model endpoint names to get the data from. When the names are passed,
187
+ you have to provide also the start and end times of the data to analyze.
188
+ :param start: The start time of the sample data.
189
+ :param end: The end time of the sample data.
116
190
 
117
191
  :returns: The output of the
118
192
  :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
119
- method wrapped in a :py:class:`~mlrun.model.RunObject`.
193
+ method with the given parameters and inputs, wrapped in a :py:class:`~mlrun.model.RunObject`.
120
194
  """
121
- if not run_local:
122
- raise NotImplementedError # ML-8360
123
-
124
195
  project = cast("mlrun.MlrunProject", mlrun.get_current_project())
125
196
  class_name = cls.__name__
126
- name = func_name if func_name is not None else class_name
197
+ job_name = func_name if func_name is not None else class_name
127
198
  handler = f"{class_name}::{cls._handler.__name__}"
128
199
 
129
200
  job = cast(
130
201
  mlrun.runtimes.KubejobRuntime,
131
202
  project.set_function(
132
203
  func=func_path,
133
- name=name,
204
+ name=job_name,
134
205
  kind=mlrun.runtimes.KubejobRuntime.kind,
135
206
  handler=handler,
136
207
  tag=tag,
208
+ image=image,
209
+ with_repo=with_repo,
210
+ requirements=requirements,
211
+ requirements_file=requirements_file,
137
212
  ),
138
213
  )
139
- run_result = job.run(local=run_local)
214
+
215
+ params: dict[str, Union[list[str], datetime]] = {}
216
+ if endpoint_names:
217
+ start, end = cls._validate_times(start, end)
218
+ params["endpoint_names"] = endpoint_names
219
+ params["start"] = start
220
+ params["end"] = end
221
+
222
+ inputs: dict[str, str] = {}
223
+ for data, identifier in [
224
+ (sample_data, "sample_data"),
225
+ (reference_data, "reference_data"),
226
+ ]:
227
+ if data is not None:
228
+ key = f"{job_name}_{identifier}"
229
+ inputs[identifier] = project.log_dataset(
230
+ key,
231
+ data,
232
+ labels={
233
+ mlrun_constants.MLRunInternalLabels.runner_pod: socket.gethostname(),
234
+ mlrun_constants.MLRunInternalLabels.producer_type: "model-monitoring-job",
235
+ mlrun_constants.MLRunInternalLabels.app_name: class_name,
236
+ },
237
+ ).uri
238
+
239
+ run_result = job.run(local=run_local, params=params, inputs=inputs)
140
240
  return run_result
141
241
 
142
242
  @abstractmethod