mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (76) hide show
  1. mlrun/__main__.py +0 -105
  2. mlrun/artifacts/__init__.py +1 -2
  3. mlrun/artifacts/base.py +8 -250
  4. mlrun/artifacts/dataset.py +1 -190
  5. mlrun/artifacts/manager.py +2 -41
  6. mlrun/artifacts/model.py +1 -140
  7. mlrun/artifacts/plots.py +1 -375
  8. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +24 -3
  10. mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
  11. mlrun/config.py +3 -3
  12. mlrun/data_types/to_pandas.py +4 -4
  13. mlrun/datastore/base.py +41 -9
  14. mlrun/datastore/datastore_profile.py +50 -3
  15. mlrun/datastore/inmem.py +2 -2
  16. mlrun/datastore/sources.py +43 -2
  17. mlrun/datastore/store_resources.py +2 -6
  18. mlrun/datastore/targets.py +106 -39
  19. mlrun/db/httpdb.py +4 -4
  20. mlrun/feature_store/__init__.py +0 -2
  21. mlrun/feature_store/api.py +12 -47
  22. mlrun/feature_store/feature_set.py +9 -0
  23. mlrun/feature_store/retrieval/base.py +9 -4
  24. mlrun/feature_store/retrieval/conversion.py +4 -4
  25. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  26. mlrun/feature_store/retrieval/job.py +2 -0
  27. mlrun/feature_store/retrieval/local_merger.py +2 -0
  28. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  29. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  30. mlrun/kfpops.py +5 -10
  31. mlrun/launcher/base.py +1 -1
  32. mlrun/launcher/client.py +1 -1
  33. mlrun/lists.py +2 -2
  34. mlrun/model.py +18 -9
  35. mlrun/model_monitoring/api.py +41 -18
  36. mlrun/model_monitoring/application.py +5 -305
  37. mlrun/model_monitoring/applications/__init__.py +11 -0
  38. mlrun/model_monitoring/applications/_application_steps.py +158 -0
  39. mlrun/model_monitoring/applications/base.py +282 -0
  40. mlrun/model_monitoring/applications/context.py +214 -0
  41. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  42. mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
  43. mlrun/model_monitoring/applications/results.py +99 -0
  44. mlrun/model_monitoring/controller.py +3 -1
  45. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  46. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
  47. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
  48. mlrun/model_monitoring/evidently_application.py +6 -118
  49. mlrun/model_monitoring/helpers.py +1 -1
  50. mlrun/model_monitoring/model_endpoint.py +3 -2
  51. mlrun/model_monitoring/stream_processing.py +2 -3
  52. mlrun/model_monitoring/writer.py +69 -39
  53. mlrun/platforms/iguazio.py +2 -2
  54. mlrun/projects/project.py +18 -31
  55. mlrun/render.py +2 -10
  56. mlrun/run.py +1 -3
  57. mlrun/runtimes/__init__.py +3 -3
  58. mlrun/runtimes/base.py +3 -3
  59. mlrun/runtimes/funcdoc.py +0 -28
  60. mlrun/runtimes/local.py +1 -1
  61. mlrun/runtimes/mpijob/__init__.py +0 -20
  62. mlrun/runtimes/mpijob/v1.py +1 -1
  63. mlrun/runtimes/nuclio/function.py +1 -1
  64. mlrun/runtimes/utils.py +1 -1
  65. mlrun/utils/helpers.py +27 -40
  66. mlrun/utils/notifications/notification/slack.py +4 -2
  67. mlrun/utils/notifications/notification_pusher.py +133 -14
  68. mlrun/utils/version/version.json +2 -2
  69. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
  70. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
  71. mlrun/runtimes/mpijob/v1alpha1.py +0 -29
  72. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  73. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
  74. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
  75. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
  76. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
mlrun/launcher/client.py CHANGED
@@ -71,7 +71,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
71
71
  ):
72
72
  run.metadata.labels["kind"] = runtime.kind
73
73
  mlrun.runtimes.utils.enrich_run_labels(
74
- run.metadata.labels, [mlrun.runtimes.constants.RunLabels.owner]
74
+ run.metadata.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
75
75
  )
76
76
  if run.spec.output_path:
77
77
  run.spec.output_path = run.spec.output_path.replace(
mlrun/lists.py CHANGED
@@ -21,7 +21,7 @@ import mlrun.frameworks
21
21
  from .artifacts import Artifact, dict_to_artifact
22
22
  from .config import config
23
23
  from .render import artifacts_to_html, runs_to_html
24
- from .utils import flatten, get_artifact_target, get_in, is_legacy_artifact
24
+ from .utils import flatten, get_artifact_target, get_in
25
25
 
26
26
  list_header = [
27
27
  "project",
@@ -184,7 +184,7 @@ class ArtifactList(list):
184
184
  "uri": ["uri", "uri"],
185
185
  }
186
186
  for artifact in self:
187
- fields_index = 0 if is_legacy_artifact(artifact) else 1
187
+ fields_index = 1
188
188
  row = [get_in(artifact, v[fields_index], "") for k, v in head.items()]
189
189
  artifact_uri = dict_to_artifact(artifact).uri
190
190
  last_index = len(row) - 1
mlrun/model.py CHANGED
@@ -33,7 +33,6 @@ from .utils import (
33
33
  dict_to_json,
34
34
  dict_to_yaml,
35
35
  get_artifact_target,
36
- is_legacy_artifact,
37
36
  logger,
38
37
  template_artifact_path,
39
38
  )
@@ -1435,11 +1434,14 @@ class RunObject(RunTemplate):
1435
1434
  unknown_error = ""
1436
1435
  if (
1437
1436
  self.status.state
1438
- in mlrun.runtimes.constants.RunStates.abortion_states()
1437
+ in mlrun.common.runtimes.constants.RunStates.abortion_states()
1439
1438
  ):
1440
1439
  unknown_error = "Run was aborted"
1441
1440
 
1442
- elif self.status.state in mlrun.runtimes.constants.RunStates.error_states():
1441
+ elif (
1442
+ self.status.state
1443
+ in mlrun.common.runtimes.constants.RunStates.error_states()
1444
+ ):
1443
1445
  unknown_error = "Unknown error"
1444
1446
 
1445
1447
  return (
@@ -1477,7 +1479,7 @@ class RunObject(RunTemplate):
1477
1479
  outputs = {k: v for k, v in self.status.results.items()}
1478
1480
  if self.status.artifacts:
1479
1481
  for a in self.status.artifacts:
1480
- key = a["key"] if is_legacy_artifact(a) else a["metadata"]["key"]
1482
+ key = a["metadata"]["key"]
1481
1483
  outputs[key] = get_artifact_target(a, self.metadata.project)
1482
1484
  return outputs
1483
1485
 
@@ -1520,7 +1522,10 @@ class RunObject(RunTemplate):
1520
1522
 
1521
1523
  def state(self):
1522
1524
  """current run state"""
1523
- if self.status.state in mlrun.runtimes.constants.RunStates.terminal_states():
1525
+ if (
1526
+ self.status.state
1527
+ in mlrun.common.runtimes.constants.RunStates.terminal_states()
1528
+ ):
1524
1529
  return self.status.state
1525
1530
  self.refresh()
1526
1531
  return self.status.state or "unknown"
@@ -1582,7 +1587,7 @@ class RunObject(RunTemplate):
1582
1587
  last_pull_log_time = None
1583
1588
  logs_enabled = show_logs is not False
1584
1589
  state = self.state()
1585
- if state not in mlrun.runtimes.constants.RunStates.terminal_states():
1590
+ if state not in mlrun.common.runtimes.constants.RunStates.terminal_states():
1586
1591
  logger.info(
1587
1592
  f"run {self.metadata.name} is not completed yet, waiting for it to complete",
1588
1593
  current_state=state,
@@ -1592,7 +1597,8 @@ class RunObject(RunTemplate):
1592
1597
  if (
1593
1598
  logs_enabled
1594
1599
  and logs_interval
1595
- and state not in mlrun.runtimes.constants.RunStates.terminal_states()
1600
+ and state
1601
+ not in mlrun.common.runtimes.constants.RunStates.terminal_states()
1596
1602
  and (
1597
1603
  last_pull_log_time is None
1598
1604
  or (datetime.now() - last_pull_log_time).seconds > logs_interval
@@ -1601,7 +1607,7 @@ class RunObject(RunTemplate):
1601
1607
  last_pull_log_time = datetime.now()
1602
1608
  state, offset = self.logs(watch=False, offset=offset)
1603
1609
 
1604
- if state in mlrun.runtimes.constants.RunStates.terminal_states():
1610
+ if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
1605
1611
  if logs_enabled and logs_interval:
1606
1612
  self.logs(watch=False, offset=offset)
1607
1613
  break
@@ -1613,7 +1619,10 @@ class RunObject(RunTemplate):
1613
1619
  )
1614
1620
  if logs_enabled and not logs_interval:
1615
1621
  self.logs(watch=False)
1616
- if raise_on_failure and state != mlrun.runtimes.constants.RunStates.completed:
1622
+ if (
1623
+ raise_on_failure
1624
+ and state != mlrun.common.runtimes.constants.RunStates.completed
1625
+ ):
1617
1626
  raise mlrun.errors.MLRunRuntimeError(
1618
1627
  f"Task {self.metadata.name} did not complete (state={state})"
1619
1628
  )
@@ -22,9 +22,10 @@ import pandas as pd
22
22
 
23
23
  import mlrun.artifacts
24
24
  import mlrun.common.helpers
25
- import mlrun.common.schemas.model_monitoring.constants as mm_consts
25
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
26
26
  import mlrun.feature_store
27
27
  import mlrun.model_monitoring.application
28
+ import mlrun.model_monitoring.applications as mm_app
28
29
  import mlrun.serving
29
30
  from mlrun.data_types.infer import InferOptions, get_df_stats
30
31
  from mlrun.utils import datetime_now, logger
@@ -48,7 +49,7 @@ def get_or_create_model_endpoint(
48
49
  sample_set_statistics: dict[str, typing.Any] = None,
49
50
  drift_threshold: float = None,
50
51
  possible_drift_threshold: float = None,
51
- monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.disabled,
52
+ monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
52
53
  db_session=None,
53
54
  ) -> ModelEndpoint:
54
55
  """
@@ -128,7 +129,7 @@ def record_results(
128
129
  context: typing.Optional[mlrun.MLClientCtx] = None,
129
130
  infer_results_df: typing.Optional[pd.DataFrame] = None,
130
131
  sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
131
- monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.enabled,
132
+ monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
132
133
  # Deprecated arguments:
133
134
  drift_threshold: typing.Optional[float] = None,
134
135
  possible_drift_threshold: typing.Optional[float] = None,
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
282
283
  # drift and possible drift thresholds
283
284
  if drift_threshold:
284
285
  current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
285
- mm_consts.EventFieldType.DRIFT_DETECTED_THRESHOLD,
286
+ mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
286
287
  mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
287
288
  )
288
289
  if current_drift_threshold != drift_threshold:
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
293
294
 
294
295
  if possible_drift_threshold:
295
296
  current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
296
- mm_consts.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
297
+ mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
297
298
  mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
298
299
  )
299
300
  if current_possible_drift_threshold != possible_drift_threshold:
@@ -332,14 +333,14 @@ def write_monitoring_df(
332
333
  )
333
334
 
334
335
  # Modify the DataFrame to the required structure that will be used later by the monitoring batch job
335
- if mm_consts.EventFieldType.TIMESTAMP not in infer_results_df.columns:
336
+ if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
336
337
  # Initialize timestamp column with the current time
337
- infer_results_df[mm_consts.EventFieldType.TIMESTAMP] = infer_datetime
338
+ infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
338
339
 
339
340
  # `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
340
341
  # the ingest process
341
- infer_results_df[mm_consts.EventFieldType.ENDPOINT_ID] = endpoint_id
342
- infer_results_df.set_index(mm_consts.EventFieldType.ENDPOINT_ID, inplace=True)
342
+ infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
343
+ infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
343
344
 
344
345
  monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
345
346
 
@@ -355,7 +356,7 @@ def _generate_model_endpoint(
355
356
  sample_set_statistics: dict[str, typing.Any],
356
357
  drift_threshold: float,
357
358
  possible_drift_threshold: float,
358
- monitoring_mode: mm_consts.ModelMonitoringMode = mm_consts.ModelMonitoringMode.disabled,
359
+ monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
359
360
  ) -> ModelEndpoint:
360
361
  """
361
362
  Write a new model endpoint record.
@@ -394,11 +395,11 @@ def _generate_model_endpoint(
394
395
  model_endpoint.spec.model_class = "drift-analysis"
395
396
  if drift_threshold:
396
397
  model_endpoint.spec.monitor_configuration[
397
- mm_consts.EventFieldType.DRIFT_DETECTED_THRESHOLD
398
+ mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
398
399
  ] = drift_threshold
399
400
  if possible_drift_threshold:
400
401
  model_endpoint.spec.monitor_configuration[
401
- mm_consts.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
402
+ mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
402
403
  ] = possible_drift_threshold
403
404
 
404
405
  model_endpoint.spec.monitoring_mode = monitoring_mode
@@ -589,7 +590,10 @@ def _create_model_monitoring_function_base(
589
590
  project: str,
590
591
  func: typing.Union[str, None] = None,
591
592
  application_class: typing.Union[
592
- str, mlrun.model_monitoring.application.ModelMonitoringApplicationBase, None
593
+ str,
594
+ mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
595
+ mm_app.ModelMonitoringApplicationBaseV2,
596
+ None,
593
597
  ] = None,
594
598
  name: typing.Optional[str] = None,
595
599
  image: typing.Optional[str] = None,
@@ -602,6 +606,20 @@ def _create_model_monitoring_function_base(
602
606
  Note: this is an internal API only.
603
607
  This function does not set the labels or mounts v3io.
604
608
  """
609
+ if isinstance(
610
+ application_class,
611
+ mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
612
+ ):
613
+ warnings.warn(
614
+ "The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
615
+ "please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
616
+ FutureWarning,
617
+ )
618
+ if name in mm_constants.MonitoringFunctionNames.list():
619
+ raise mlrun.errors.MLRunInvalidArgumentError(
620
+ f"An application cannot have the following names: "
621
+ f"{mm_constants.MonitoringFunctionNames.list()}"
622
+ )
605
623
  if func is None:
606
624
  func = ""
607
625
  func_obj = typing.cast(
@@ -618,14 +636,19 @@ def _create_model_monitoring_function_base(
618
636
  ),
619
637
  )
620
638
  graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
639
+ prepare_step = graph.to(
640
+ class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
641
+ name="PrepareMonitoringEvent",
642
+ application_name=name,
643
+ )
621
644
  if isinstance(application_class, str):
622
- first_step = graph.to(class_name=application_class, **application_kwargs)
645
+ app_step = prepare_step.to(class_name=application_class, **application_kwargs)
623
646
  else:
624
- first_step = graph.to(class_name=application_class)
625
- first_step.to(
626
- class_name="mlrun.model_monitoring.application.PushToMonitoringWriter",
647
+ app_step = prepare_step.to(class_name=application_class)
648
+ app_step.to(
649
+ class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
627
650
  name="PushToMonitoringWriter",
628
651
  project=project,
629
- writer_application_name=mm_consts.MonitoringFunctionNames.WRITER,
652
+ writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
630
653
  ).respond()
631
654
  return func_obj
@@ -12,308 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import dataclasses
16
- import json
17
- import re
18
- from abc import ABC, abstractmethod
19
- from typing import Any, Optional, Union, cast
20
-
21
- import numpy as np
22
- import pandas as pd
23
-
24
- import mlrun.common.helpers
25
- import mlrun.common.model_monitoring.helpers
26
- import mlrun.common.schemas.model_monitoring.constants as mm_constant
27
- import mlrun.utils.v3io_clients
28
- from mlrun.datastore import get_stream_pusher
29
- from mlrun.datastore.targets import ParquetTarget
30
- from mlrun.model_monitoring.helpers import get_stream_path
31
- from mlrun.serving.utils import StepToDict
32
- from mlrun.utils import logger
33
-
34
-
35
- @dataclasses.dataclass
36
- class ModelMonitoringApplicationResult:
37
- """
38
- Class representing the result of a custom model monitoring application.
39
-
40
- :param name: (str) Name of the application result. This name must be
41
- unique for each metric in a single application
42
- (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
43
- :param value: (float) Value of the application result.
44
- :param kind: (ResultKindApp) Kind of application result.
45
- :param status: (ResultStatusApp) Status of the application result.
46
- :param extra_data: (dict) Extra data associated with the application result.
47
- """
48
-
49
- name: str
50
- value: float
51
- kind: mm_constant.ResultKindApp
52
- status: mm_constant.ResultStatusApp
53
- extra_data: dict = dataclasses.field(default_factory=dict)
54
-
55
- def __post_init__(self):
56
- pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
57
- if not re.fullmatch(pat, self.name):
58
- raise mlrun.errors.MLRunInvalidArgumentError(
59
- "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
60
- )
61
-
62
- def to_dict(self):
63
- """
64
- Convert the object to a dictionary format suitable for writing.
65
-
66
- :returns: (dict) Dictionary representation of the result.
67
- """
68
- return {
69
- mm_constant.WriterEvent.RESULT_NAME: self.name,
70
- mm_constant.WriterEvent.RESULT_VALUE: self.value,
71
- mm_constant.WriterEvent.RESULT_KIND: self.kind,
72
- mm_constant.WriterEvent.RESULT_STATUS: self.status,
73
- mm_constant.WriterEvent.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
74
- }
75
-
76
-
77
- class ModelMonitoringApplicationBase(StepToDict, ABC):
78
- """
79
- A base class for a model monitoring application.
80
- Inherit from this class to create a custom model monitoring application.
81
-
82
- example for very simple custom application::
83
- # mlrun: start-code
84
- class MyApp(ApplicationBase):
85
- def do_tracking(
86
- self,
87
- sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
88
- feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
89
- start_infer_time: pd.Timestamp,
90
- end_infer_time: pd.Timestamp,
91
- schedule_time: pd.Timestamp,
92
- latest_request: pd.Timestamp,
93
- endpoint_id: str,
94
- output_stream_uri: str,
95
- ) -> ModelMonitoringApplicationResult:
96
- self.context.log_artifact(
97
- TableArtifact(
98
- "sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
99
- )
100
- )
101
- return ModelMonitoringApplicationResult(
102
- name="data_drift_test",
103
- value=0.5,
104
- kind=mm_constant.ResultKindApp.data_drift,
105
- status=mm_constant.ResultStatusApp.detected,
106
- )
107
-
108
-
109
- # mlrun: end-code
110
- """
111
-
112
- kind = "monitoring_application"
113
-
114
- def do(
115
- self, event: dict[str, Any]
116
- ) -> tuple[list[ModelMonitoringApplicationResult], dict]:
117
- """
118
- Process the monitoring event and return application results.
119
-
120
- :param event: (dict) The monitoring event to process.
121
- :returns: (list[ModelMonitoringApplicationResult], dict) The application results
122
- and the original event for the application.
123
- """
124
- resolved_event = self._resolve_event(event)
125
- if not (
126
- hasattr(self, "context") and isinstance(self.context, mlrun.MLClientCtx)
127
- ):
128
- self._lazy_init(app_name=resolved_event[0])
129
- results = self.do_tracking(*resolved_event)
130
- results = results if isinstance(results, list) else [results]
131
- return results, event
132
-
133
- def _lazy_init(self, app_name: str):
134
- self.context = cast(
135
- mlrun.MLClientCtx, self._create_context_for_logging(app_name=app_name)
136
- )
137
-
138
- @abstractmethod
139
- def do_tracking(
140
- self,
141
- application_name: str,
142
- sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
143
- feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
144
- sample_df: pd.DataFrame,
145
- start_infer_time: pd.Timestamp,
146
- end_infer_time: pd.Timestamp,
147
- latest_request: pd.Timestamp,
148
- endpoint_id: str,
149
- output_stream_uri: str,
150
- ) -> Union[
151
- ModelMonitoringApplicationResult, list[ModelMonitoringApplicationResult]
152
- ]:
153
- """
154
- Implement this method with your custom monitoring logic.
155
-
156
- :param application_name: (str) the app name
157
- :param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
158
- :param feature_stats: (FeatureStats) The train sample distribution dictionary.
159
- :param sample_df: (pd.DataFrame) The new sample DataFrame.
160
- :param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
161
- :param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
162
- :param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
163
- :param endpoint_id: (str) ID of the monitored model endpoint
164
- :param output_stream_uri: (str) URI of the output stream for results
165
-
166
- :returns: (ModelMonitoringApplicationResult) or
167
- (list[ModelMonitoringApplicationResult]) of the application results.
168
- """
169
- raise NotImplementedError
170
-
171
- @classmethod
172
- def _resolve_event(
173
- cls,
174
- event: dict[str, Any],
175
- ) -> tuple[
176
- str,
177
- mlrun.common.model_monitoring.helpers.FeatureStats,
178
- mlrun.common.model_monitoring.helpers.FeatureStats,
179
- pd.DataFrame,
180
- pd.Timestamp,
181
- pd.Timestamp,
182
- pd.Timestamp,
183
- str,
184
- str,
185
- ]:
186
- """
187
- Converting the event into a single tuple that will be used for passing the event arguments to the running
188
- application
189
-
190
- :param event: dictionary with all the incoming data
191
-
192
- :return: A tuple of:
193
- [0] = (str) application name
194
- [1] = (dict) current input statistics
195
- [2] = (dict) train statistics
196
- [3] = (pd.DataFrame) current input data
197
- [4] = (pd.Timestamp) start time of the monitoring schedule
198
- [5] = (pd.Timestamp) end time of the monitoring schedule
199
- [6] = (pd.Timestamp) timestamp of the latest request
200
- [7] = (str) endpoint id
201
- [8] = (str) output stream uri
202
- """
203
- start_time = pd.Timestamp(event[mm_constant.ApplicationEvent.START_INFER_TIME])
204
- end_time = pd.Timestamp(event[mm_constant.ApplicationEvent.END_INFER_TIME])
205
- return (
206
- event[mm_constant.ApplicationEvent.APPLICATION_NAME],
207
- json.loads(event[mm_constant.ApplicationEvent.CURRENT_STATS]),
208
- json.loads(event[mm_constant.ApplicationEvent.FEATURE_STATS]),
209
- ParquetTarget(
210
- path=event[mm_constant.ApplicationEvent.SAMPLE_PARQUET_PATH]
211
- ).as_df(
212
- start_time=start_time,
213
- end_time=end_time,
214
- time_column=mm_constant.FeatureSetFeatures.time_stamp(),
215
- ),
216
- start_time,
217
- end_time,
218
- pd.Timestamp(event[mm_constant.ApplicationEvent.LAST_REQUEST]),
219
- event[mm_constant.ApplicationEvent.ENDPOINT_ID],
220
- event[mm_constant.ApplicationEvent.OUTPUT_STREAM_URI],
221
- )
222
-
223
- @staticmethod
224
- def _create_context_for_logging(app_name: str):
225
- context = mlrun.get_or_create_ctx(
226
- f"{app_name}-logger",
227
- upload_artifacts=True,
228
- labels={"workflow": "model-monitoring-app-logger"},
229
- )
230
- return context
231
-
232
- @staticmethod
233
- def dict_to_histogram(
234
- histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
235
- ) -> pd.DataFrame:
236
- """
237
- Convert histogram dictionary to pandas DataFrame with feature histograms as columns
238
-
239
- :param histogram_dict: Histogram dictionary
240
-
241
- :returns: Histogram dataframe
242
- """
243
-
244
- # Create a dictionary with feature histograms as values
245
- histograms = {}
246
- for feature, stats in histogram_dict.items():
247
- if "hist" in stats:
248
- # Normalize to probability distribution of each feature
249
- histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
250
-
251
- # Convert the dictionary to pandas DataFrame
252
- histograms = pd.DataFrame(histograms)
253
-
254
- return histograms
255
-
256
-
257
- class PushToMonitoringWriter(StepToDict):
258
- kind = "monitoring_application_stream_pusher"
259
-
260
- def __init__(
261
- self,
262
- project: Optional[str] = None,
263
- writer_application_name: Optional[str] = None,
264
- stream_uri: Optional[str] = None,
265
- name: Optional[str] = None,
266
- ):
267
- """
268
- Class for pushing application results to the monitoring writer stream.
269
-
270
- :param project: Project name.
271
- :param writer_application_name: Writer application name.
272
- :param stream_uri: Stream URI for pushing results.
273
- :param name: Name of the PushToMonitoringWriter
274
- instance default to PushToMonitoringWriter.
275
- """
276
- self.project = project
277
- self.application_name_to_push = writer_application_name
278
- self.stream_uri = stream_uri or get_stream_path(
279
- project=self.project, function_name=self.application_name_to_push
280
- )
281
- self.output_stream = None
282
- self.name = name or "PushToMonitoringWriter"
283
-
284
- def do(self, event: tuple[list[ModelMonitoringApplicationResult], dict]) -> None:
285
- """
286
- Push application results to the monitoring writer stream.
287
-
288
- :param event: Monitoring result(s) to push and the original event from the controller.
289
- """
290
- self._lazy_init()
291
- application_results, application_event = event
292
- metadata = {
293
- mm_constant.WriterEvent.APPLICATION_NAME: application_event[
294
- mm_constant.ApplicationEvent.APPLICATION_NAME
295
- ],
296
- mm_constant.WriterEvent.ENDPOINT_ID: application_event[
297
- mm_constant.ApplicationEvent.ENDPOINT_ID
298
- ],
299
- mm_constant.WriterEvent.START_INFER_TIME: application_event[
300
- mm_constant.ApplicationEvent.START_INFER_TIME
301
- ],
302
- mm_constant.WriterEvent.END_INFER_TIME: application_event[
303
- mm_constant.ApplicationEvent.END_INFER_TIME
304
- ],
305
- mm_constant.WriterEvent.CURRENT_STATS: json.dumps(
306
- application_event[mm_constant.ApplicationEvent.CURRENT_STATS]
307
- ),
308
- }
309
- for result in application_results:
310
- data = result.to_dict()
311
- data.update(metadata)
312
- logger.info(f"Pushing data = {data} \n to stream = {self.stream_uri}")
313
- self.output_stream.push([data])
314
-
315
- def _lazy_init(self):
316
- if self.output_stream is None:
317
- self.output_stream = get_stream_pusher(
318
- self.stream_uri,
319
- )
15
+ # TODO : delete this file in 1.9.0
16
+ from mlrun.model_monitoring.applications import ( # noqa: F401
17
+ ModelMonitoringApplicationBase,
18
+ ModelMonitoringApplicationResult,
19
+ )
@@ -11,3 +11,14 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ #
15
+
16
+ from .base import ModelMonitoringApplicationBase, ModelMonitoringApplicationBaseV2
17
+ from .context import MonitoringApplicationContext
18
+ from .evidently_base import (
19
+ _HAS_EVIDENTLY,
20
+ SUPPORTED_EVIDENTLY_VERSION,
21
+ EvidentlyModelMonitoringApplicationBase,
22
+ EvidentlyModelMonitoringApplicationBaseV2,
23
+ )
24
+ from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult