mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +0 -105
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +8 -250
- mlrun/artifacts/dataset.py +1 -190
- mlrun/artifacts/manager.py +2 -41
- mlrun/artifacts/model.py +1 -140
- mlrun/artifacts/plots.py +1 -375
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +24 -3
- mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
- mlrun/config.py +3 -3
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +50 -3
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/httpdb.py +4 -4
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/kfpops.py +5 -10
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +2 -2
- mlrun/model.py +18 -9
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +158 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +2 -3
- mlrun/model_monitoring/writer.py +69 -39
- mlrun/platforms/iguazio.py +2 -2
- mlrun/projects/project.py +18 -31
- mlrun/render.py +2 -10
- mlrun/run.py +1 -3
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/function.py +1 -1
- mlrun/runtimes/utils.py +1 -1
- mlrun/utils/helpers.py +27 -40
- mlrun/utils/notifications/notification/slack.py +4 -2
- mlrun/utils/notifications/notification_pusher.py +133 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
- mlrun/runtimes/mpijob/v1alpha1.py +0 -29
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
mlrun/launcher/client.py
CHANGED
|
@@ -71,7 +71,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
71
71
|
):
|
|
72
72
|
run.metadata.labels["kind"] = runtime.kind
|
|
73
73
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
74
|
-
run.metadata.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
74
|
+
run.metadata.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
75
75
|
)
|
|
76
76
|
if run.spec.output_path:
|
|
77
77
|
run.spec.output_path = run.spec.output_path.replace(
|
mlrun/lists.py
CHANGED
|
@@ -21,7 +21,7 @@ import mlrun.frameworks
|
|
|
21
21
|
from .artifacts import Artifact, dict_to_artifact
|
|
22
22
|
from .config import config
|
|
23
23
|
from .render import artifacts_to_html, runs_to_html
|
|
24
|
-
from .utils import flatten, get_artifact_target, get_in
|
|
24
|
+
from .utils import flatten, get_artifact_target, get_in
|
|
25
25
|
|
|
26
26
|
list_header = [
|
|
27
27
|
"project",
|
|
@@ -184,7 +184,7 @@ class ArtifactList(list):
|
|
|
184
184
|
"uri": ["uri", "uri"],
|
|
185
185
|
}
|
|
186
186
|
for artifact in self:
|
|
187
|
-
fields_index =
|
|
187
|
+
fields_index = 1
|
|
188
188
|
row = [get_in(artifact, v[fields_index], "") for k, v in head.items()]
|
|
189
189
|
artifact_uri = dict_to_artifact(artifact).uri
|
|
190
190
|
last_index = len(row) - 1
|
mlrun/model.py
CHANGED
|
@@ -33,7 +33,6 @@ from .utils import (
|
|
|
33
33
|
dict_to_json,
|
|
34
34
|
dict_to_yaml,
|
|
35
35
|
get_artifact_target,
|
|
36
|
-
is_legacy_artifact,
|
|
37
36
|
logger,
|
|
38
37
|
template_artifact_path,
|
|
39
38
|
)
|
|
@@ -1435,11 +1434,14 @@ class RunObject(RunTemplate):
|
|
|
1435
1434
|
unknown_error = ""
|
|
1436
1435
|
if (
|
|
1437
1436
|
self.status.state
|
|
1438
|
-
in mlrun.runtimes.constants.RunStates.abortion_states()
|
|
1437
|
+
in mlrun.common.runtimes.constants.RunStates.abortion_states()
|
|
1439
1438
|
):
|
|
1440
1439
|
unknown_error = "Run was aborted"
|
|
1441
1440
|
|
|
1442
|
-
elif
|
|
1441
|
+
elif (
|
|
1442
|
+
self.status.state
|
|
1443
|
+
in mlrun.common.runtimes.constants.RunStates.error_states()
|
|
1444
|
+
):
|
|
1443
1445
|
unknown_error = "Unknown error"
|
|
1444
1446
|
|
|
1445
1447
|
return (
|
|
@@ -1477,7 +1479,7 @@ class RunObject(RunTemplate):
|
|
|
1477
1479
|
outputs = {k: v for k, v in self.status.results.items()}
|
|
1478
1480
|
if self.status.artifacts:
|
|
1479
1481
|
for a in self.status.artifacts:
|
|
1480
|
-
key = a["
|
|
1482
|
+
key = a["metadata"]["key"]
|
|
1481
1483
|
outputs[key] = get_artifact_target(a, self.metadata.project)
|
|
1482
1484
|
return outputs
|
|
1483
1485
|
|
|
@@ -1520,7 +1522,10 @@ class RunObject(RunTemplate):
|
|
|
1520
1522
|
|
|
1521
1523
|
def state(self):
|
|
1522
1524
|
"""current run state"""
|
|
1523
|
-
if
|
|
1525
|
+
if (
|
|
1526
|
+
self.status.state
|
|
1527
|
+
in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1528
|
+
):
|
|
1524
1529
|
return self.status.state
|
|
1525
1530
|
self.refresh()
|
|
1526
1531
|
return self.status.state or "unknown"
|
|
@@ -1582,7 +1587,7 @@ class RunObject(RunTemplate):
|
|
|
1582
1587
|
last_pull_log_time = None
|
|
1583
1588
|
logs_enabled = show_logs is not False
|
|
1584
1589
|
state = self.state()
|
|
1585
|
-
if state not in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1590
|
+
if state not in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1586
1591
|
logger.info(
|
|
1587
1592
|
f"run {self.metadata.name} is not completed yet, waiting for it to complete",
|
|
1588
1593
|
current_state=state,
|
|
@@ -1592,7 +1597,8 @@ class RunObject(RunTemplate):
|
|
|
1592
1597
|
if (
|
|
1593
1598
|
logs_enabled
|
|
1594
1599
|
and logs_interval
|
|
1595
|
-
and state
|
|
1600
|
+
and state
|
|
1601
|
+
not in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1596
1602
|
and (
|
|
1597
1603
|
last_pull_log_time is None
|
|
1598
1604
|
or (datetime.now() - last_pull_log_time).seconds > logs_interval
|
|
@@ -1601,7 +1607,7 @@ class RunObject(RunTemplate):
|
|
|
1601
1607
|
last_pull_log_time = datetime.now()
|
|
1602
1608
|
state, offset = self.logs(watch=False, offset=offset)
|
|
1603
1609
|
|
|
1604
|
-
if state in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1610
|
+
if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1605
1611
|
if logs_enabled and logs_interval:
|
|
1606
1612
|
self.logs(watch=False, offset=offset)
|
|
1607
1613
|
break
|
|
@@ -1613,7 +1619,10 @@ class RunObject(RunTemplate):
|
|
|
1613
1619
|
)
|
|
1614
1620
|
if logs_enabled and not logs_interval:
|
|
1615
1621
|
self.logs(watch=False)
|
|
1616
|
-
if
|
|
1622
|
+
if (
|
|
1623
|
+
raise_on_failure
|
|
1624
|
+
and state != mlrun.common.runtimes.constants.RunStates.completed
|
|
1625
|
+
):
|
|
1617
1626
|
raise mlrun.errors.MLRunRuntimeError(
|
|
1618
1627
|
f"Task {self.metadata.name} did not complete (state={state})"
|
|
1619
1628
|
)
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -22,9 +22,10 @@ import pandas as pd
|
|
|
22
22
|
|
|
23
23
|
import mlrun.artifacts
|
|
24
24
|
import mlrun.common.helpers
|
|
25
|
-
import mlrun.common.schemas.model_monitoring.constants as
|
|
25
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
26
26
|
import mlrun.feature_store
|
|
27
27
|
import mlrun.model_monitoring.application
|
|
28
|
+
import mlrun.model_monitoring.applications as mm_app
|
|
28
29
|
import mlrun.serving
|
|
29
30
|
from mlrun.data_types.infer import InferOptions, get_df_stats
|
|
30
31
|
from mlrun.utils import datetime_now, logger
|
|
@@ -48,7 +49,7 @@ def get_or_create_model_endpoint(
|
|
|
48
49
|
sample_set_statistics: dict[str, typing.Any] = None,
|
|
49
50
|
drift_threshold: float = None,
|
|
50
51
|
possible_drift_threshold: float = None,
|
|
51
|
-
monitoring_mode:
|
|
52
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
52
53
|
db_session=None,
|
|
53
54
|
) -> ModelEndpoint:
|
|
54
55
|
"""
|
|
@@ -128,7 +129,7 @@ def record_results(
|
|
|
128
129
|
context: typing.Optional[mlrun.MLClientCtx] = None,
|
|
129
130
|
infer_results_df: typing.Optional[pd.DataFrame] = None,
|
|
130
131
|
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
131
|
-
monitoring_mode:
|
|
132
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
132
133
|
# Deprecated arguments:
|
|
133
134
|
drift_threshold: typing.Optional[float] = None,
|
|
134
135
|
possible_drift_threshold: typing.Optional[float] = None,
|
|
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
|
|
|
282
283
|
# drift and possible drift thresholds
|
|
283
284
|
if drift_threshold:
|
|
284
285
|
current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
285
|
-
|
|
286
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
286
287
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
287
288
|
)
|
|
288
289
|
if current_drift_threshold != drift_threshold:
|
|
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
|
|
|
293
294
|
|
|
294
295
|
if possible_drift_threshold:
|
|
295
296
|
current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
296
|
-
|
|
297
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
297
298
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
298
299
|
)
|
|
299
300
|
if current_possible_drift_threshold != possible_drift_threshold:
|
|
@@ -332,14 +333,14 @@ def write_monitoring_df(
|
|
|
332
333
|
)
|
|
333
334
|
|
|
334
335
|
# Modify the DataFrame to the required structure that will be used later by the monitoring batch job
|
|
335
|
-
if
|
|
336
|
+
if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
|
|
336
337
|
# Initialize timestamp column with the current time
|
|
337
|
-
infer_results_df[
|
|
338
|
+
infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
|
|
338
339
|
|
|
339
340
|
# `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
|
|
340
341
|
# the ingest process
|
|
341
|
-
infer_results_df[
|
|
342
|
-
infer_results_df.set_index(
|
|
342
|
+
infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
343
|
+
infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
|
|
343
344
|
|
|
344
345
|
monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
|
|
345
346
|
|
|
@@ -355,7 +356,7 @@ def _generate_model_endpoint(
|
|
|
355
356
|
sample_set_statistics: dict[str, typing.Any],
|
|
356
357
|
drift_threshold: float,
|
|
357
358
|
possible_drift_threshold: float,
|
|
358
|
-
monitoring_mode:
|
|
359
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
359
360
|
) -> ModelEndpoint:
|
|
360
361
|
"""
|
|
361
362
|
Write a new model endpoint record.
|
|
@@ -394,11 +395,11 @@ def _generate_model_endpoint(
|
|
|
394
395
|
model_endpoint.spec.model_class = "drift-analysis"
|
|
395
396
|
if drift_threshold:
|
|
396
397
|
model_endpoint.spec.monitor_configuration[
|
|
397
|
-
|
|
398
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
398
399
|
] = drift_threshold
|
|
399
400
|
if possible_drift_threshold:
|
|
400
401
|
model_endpoint.spec.monitor_configuration[
|
|
401
|
-
|
|
402
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
402
403
|
] = possible_drift_threshold
|
|
403
404
|
|
|
404
405
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
@@ -589,7 +590,10 @@ def _create_model_monitoring_function_base(
|
|
|
589
590
|
project: str,
|
|
590
591
|
func: typing.Union[str, None] = None,
|
|
591
592
|
application_class: typing.Union[
|
|
592
|
-
str,
|
|
593
|
+
str,
|
|
594
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
595
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
596
|
+
None,
|
|
593
597
|
] = None,
|
|
594
598
|
name: typing.Optional[str] = None,
|
|
595
599
|
image: typing.Optional[str] = None,
|
|
@@ -602,6 +606,20 @@ def _create_model_monitoring_function_base(
|
|
|
602
606
|
Note: this is an internal API only.
|
|
603
607
|
This function does not set the labels or mounts v3io.
|
|
604
608
|
"""
|
|
609
|
+
if isinstance(
|
|
610
|
+
application_class,
|
|
611
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
612
|
+
):
|
|
613
|
+
warnings.warn(
|
|
614
|
+
"The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
|
|
615
|
+
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
616
|
+
FutureWarning,
|
|
617
|
+
)
|
|
618
|
+
if name in mm_constants.MonitoringFunctionNames.list():
|
|
619
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
620
|
+
f"An application cannot have the following names: "
|
|
621
|
+
f"{mm_constants.MonitoringFunctionNames.list()}"
|
|
622
|
+
)
|
|
605
623
|
if func is None:
|
|
606
624
|
func = ""
|
|
607
625
|
func_obj = typing.cast(
|
|
@@ -618,14 +636,19 @@ def _create_model_monitoring_function_base(
|
|
|
618
636
|
),
|
|
619
637
|
)
|
|
620
638
|
graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
|
|
639
|
+
prepare_step = graph.to(
|
|
640
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
|
|
641
|
+
name="PrepareMonitoringEvent",
|
|
642
|
+
application_name=name,
|
|
643
|
+
)
|
|
621
644
|
if isinstance(application_class, str):
|
|
622
|
-
|
|
645
|
+
app_step = prepare_step.to(class_name=application_class, **application_kwargs)
|
|
623
646
|
else:
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
class_name="mlrun.model_monitoring.
|
|
647
|
+
app_step = prepare_step.to(class_name=application_class)
|
|
648
|
+
app_step.to(
|
|
649
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
|
|
627
650
|
name="PushToMonitoringWriter",
|
|
628
651
|
project=project,
|
|
629
|
-
writer_application_name=
|
|
652
|
+
writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
630
653
|
).respond()
|
|
631
654
|
return func_obj
|
|
@@ -12,308 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
import numpy as np
|
|
22
|
-
import pandas as pd
|
|
23
|
-
|
|
24
|
-
import mlrun.common.helpers
|
|
25
|
-
import mlrun.common.model_monitoring.helpers
|
|
26
|
-
import mlrun.common.schemas.model_monitoring.constants as mm_constant
|
|
27
|
-
import mlrun.utils.v3io_clients
|
|
28
|
-
from mlrun.datastore import get_stream_pusher
|
|
29
|
-
from mlrun.datastore.targets import ParquetTarget
|
|
30
|
-
from mlrun.model_monitoring.helpers import get_stream_path
|
|
31
|
-
from mlrun.serving.utils import StepToDict
|
|
32
|
-
from mlrun.utils import logger
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
@dataclasses.dataclass
|
|
36
|
-
class ModelMonitoringApplicationResult:
|
|
37
|
-
"""
|
|
38
|
-
Class representing the result of a custom model monitoring application.
|
|
39
|
-
|
|
40
|
-
:param name: (str) Name of the application result. This name must be
|
|
41
|
-
unique for each metric in a single application
|
|
42
|
-
(name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
|
|
43
|
-
:param value: (float) Value of the application result.
|
|
44
|
-
:param kind: (ResultKindApp) Kind of application result.
|
|
45
|
-
:param status: (ResultStatusApp) Status of the application result.
|
|
46
|
-
:param extra_data: (dict) Extra data associated with the application result.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
name: str
|
|
50
|
-
value: float
|
|
51
|
-
kind: mm_constant.ResultKindApp
|
|
52
|
-
status: mm_constant.ResultStatusApp
|
|
53
|
-
extra_data: dict = dataclasses.field(default_factory=dict)
|
|
54
|
-
|
|
55
|
-
def __post_init__(self):
|
|
56
|
-
pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
|
|
57
|
-
if not re.fullmatch(pat, self.name):
|
|
58
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
59
|
-
"Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
def to_dict(self):
|
|
63
|
-
"""
|
|
64
|
-
Convert the object to a dictionary format suitable for writing.
|
|
65
|
-
|
|
66
|
-
:returns: (dict) Dictionary representation of the result.
|
|
67
|
-
"""
|
|
68
|
-
return {
|
|
69
|
-
mm_constant.WriterEvent.RESULT_NAME: self.name,
|
|
70
|
-
mm_constant.WriterEvent.RESULT_VALUE: self.value,
|
|
71
|
-
mm_constant.WriterEvent.RESULT_KIND: self.kind,
|
|
72
|
-
mm_constant.WriterEvent.RESULT_STATUS: self.status,
|
|
73
|
-
mm_constant.WriterEvent.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class ModelMonitoringApplicationBase(StepToDict, ABC):
|
|
78
|
-
"""
|
|
79
|
-
A base class for a model monitoring application.
|
|
80
|
-
Inherit from this class to create a custom model monitoring application.
|
|
81
|
-
|
|
82
|
-
example for very simple custom application::
|
|
83
|
-
# mlrun: start-code
|
|
84
|
-
class MyApp(ApplicationBase):
|
|
85
|
-
def do_tracking(
|
|
86
|
-
self,
|
|
87
|
-
sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
88
|
-
feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
89
|
-
start_infer_time: pd.Timestamp,
|
|
90
|
-
end_infer_time: pd.Timestamp,
|
|
91
|
-
schedule_time: pd.Timestamp,
|
|
92
|
-
latest_request: pd.Timestamp,
|
|
93
|
-
endpoint_id: str,
|
|
94
|
-
output_stream_uri: str,
|
|
95
|
-
) -> ModelMonitoringApplicationResult:
|
|
96
|
-
self.context.log_artifact(
|
|
97
|
-
TableArtifact(
|
|
98
|
-
"sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
|
|
99
|
-
)
|
|
100
|
-
)
|
|
101
|
-
return ModelMonitoringApplicationResult(
|
|
102
|
-
name="data_drift_test",
|
|
103
|
-
value=0.5,
|
|
104
|
-
kind=mm_constant.ResultKindApp.data_drift,
|
|
105
|
-
status=mm_constant.ResultStatusApp.detected,
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
# mlrun: end-code
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
kind = "monitoring_application"
|
|
113
|
-
|
|
114
|
-
def do(
|
|
115
|
-
self, event: dict[str, Any]
|
|
116
|
-
) -> tuple[list[ModelMonitoringApplicationResult], dict]:
|
|
117
|
-
"""
|
|
118
|
-
Process the monitoring event and return application results.
|
|
119
|
-
|
|
120
|
-
:param event: (dict) The monitoring event to process.
|
|
121
|
-
:returns: (list[ModelMonitoringApplicationResult], dict) The application results
|
|
122
|
-
and the original event for the application.
|
|
123
|
-
"""
|
|
124
|
-
resolved_event = self._resolve_event(event)
|
|
125
|
-
if not (
|
|
126
|
-
hasattr(self, "context") and isinstance(self.context, mlrun.MLClientCtx)
|
|
127
|
-
):
|
|
128
|
-
self._lazy_init(app_name=resolved_event[0])
|
|
129
|
-
results = self.do_tracking(*resolved_event)
|
|
130
|
-
results = results if isinstance(results, list) else [results]
|
|
131
|
-
return results, event
|
|
132
|
-
|
|
133
|
-
def _lazy_init(self, app_name: str):
|
|
134
|
-
self.context = cast(
|
|
135
|
-
mlrun.MLClientCtx, self._create_context_for_logging(app_name=app_name)
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
@abstractmethod
|
|
139
|
-
def do_tracking(
|
|
140
|
-
self,
|
|
141
|
-
application_name: str,
|
|
142
|
-
sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
143
|
-
feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
144
|
-
sample_df: pd.DataFrame,
|
|
145
|
-
start_infer_time: pd.Timestamp,
|
|
146
|
-
end_infer_time: pd.Timestamp,
|
|
147
|
-
latest_request: pd.Timestamp,
|
|
148
|
-
endpoint_id: str,
|
|
149
|
-
output_stream_uri: str,
|
|
150
|
-
) -> Union[
|
|
151
|
-
ModelMonitoringApplicationResult, list[ModelMonitoringApplicationResult]
|
|
152
|
-
]:
|
|
153
|
-
"""
|
|
154
|
-
Implement this method with your custom monitoring logic.
|
|
155
|
-
|
|
156
|
-
:param application_name: (str) the app name
|
|
157
|
-
:param sample_df_stats: (FeatureStats) The new sample distribution dictionary.
|
|
158
|
-
:param feature_stats: (FeatureStats) The train sample distribution dictionary.
|
|
159
|
-
:param sample_df: (pd.DataFrame) The new sample DataFrame.
|
|
160
|
-
:param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
|
|
161
|
-
:param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
|
|
162
|
-
:param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
|
|
163
|
-
:param endpoint_id: (str) ID of the monitored model endpoint
|
|
164
|
-
:param output_stream_uri: (str) URI of the output stream for results
|
|
165
|
-
|
|
166
|
-
:returns: (ModelMonitoringApplicationResult) or
|
|
167
|
-
(list[ModelMonitoringApplicationResult]) of the application results.
|
|
168
|
-
"""
|
|
169
|
-
raise NotImplementedError
|
|
170
|
-
|
|
171
|
-
@classmethod
|
|
172
|
-
def _resolve_event(
|
|
173
|
-
cls,
|
|
174
|
-
event: dict[str, Any],
|
|
175
|
-
) -> tuple[
|
|
176
|
-
str,
|
|
177
|
-
mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
178
|
-
mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
179
|
-
pd.DataFrame,
|
|
180
|
-
pd.Timestamp,
|
|
181
|
-
pd.Timestamp,
|
|
182
|
-
pd.Timestamp,
|
|
183
|
-
str,
|
|
184
|
-
str,
|
|
185
|
-
]:
|
|
186
|
-
"""
|
|
187
|
-
Converting the event into a single tuple that will be used for passing the event arguments to the running
|
|
188
|
-
application
|
|
189
|
-
|
|
190
|
-
:param event: dictionary with all the incoming data
|
|
191
|
-
|
|
192
|
-
:return: A tuple of:
|
|
193
|
-
[0] = (str) application name
|
|
194
|
-
[1] = (dict) current input statistics
|
|
195
|
-
[2] = (dict) train statistics
|
|
196
|
-
[3] = (pd.DataFrame) current input data
|
|
197
|
-
[4] = (pd.Timestamp) start time of the monitoring schedule
|
|
198
|
-
[5] = (pd.Timestamp) end time of the monitoring schedule
|
|
199
|
-
[6] = (pd.Timestamp) timestamp of the latest request
|
|
200
|
-
[7] = (str) endpoint id
|
|
201
|
-
[8] = (str) output stream uri
|
|
202
|
-
"""
|
|
203
|
-
start_time = pd.Timestamp(event[mm_constant.ApplicationEvent.START_INFER_TIME])
|
|
204
|
-
end_time = pd.Timestamp(event[mm_constant.ApplicationEvent.END_INFER_TIME])
|
|
205
|
-
return (
|
|
206
|
-
event[mm_constant.ApplicationEvent.APPLICATION_NAME],
|
|
207
|
-
json.loads(event[mm_constant.ApplicationEvent.CURRENT_STATS]),
|
|
208
|
-
json.loads(event[mm_constant.ApplicationEvent.FEATURE_STATS]),
|
|
209
|
-
ParquetTarget(
|
|
210
|
-
path=event[mm_constant.ApplicationEvent.SAMPLE_PARQUET_PATH]
|
|
211
|
-
).as_df(
|
|
212
|
-
start_time=start_time,
|
|
213
|
-
end_time=end_time,
|
|
214
|
-
time_column=mm_constant.FeatureSetFeatures.time_stamp(),
|
|
215
|
-
),
|
|
216
|
-
start_time,
|
|
217
|
-
end_time,
|
|
218
|
-
pd.Timestamp(event[mm_constant.ApplicationEvent.LAST_REQUEST]),
|
|
219
|
-
event[mm_constant.ApplicationEvent.ENDPOINT_ID],
|
|
220
|
-
event[mm_constant.ApplicationEvent.OUTPUT_STREAM_URI],
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
@staticmethod
|
|
224
|
-
def _create_context_for_logging(app_name: str):
|
|
225
|
-
context = mlrun.get_or_create_ctx(
|
|
226
|
-
f"{app_name}-logger",
|
|
227
|
-
upload_artifacts=True,
|
|
228
|
-
labels={"workflow": "model-monitoring-app-logger"},
|
|
229
|
-
)
|
|
230
|
-
return context
|
|
231
|
-
|
|
232
|
-
@staticmethod
|
|
233
|
-
def dict_to_histogram(
|
|
234
|
-
histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
235
|
-
) -> pd.DataFrame:
|
|
236
|
-
"""
|
|
237
|
-
Convert histogram dictionary to pandas DataFrame with feature histograms as columns
|
|
238
|
-
|
|
239
|
-
:param histogram_dict: Histogram dictionary
|
|
240
|
-
|
|
241
|
-
:returns: Histogram dataframe
|
|
242
|
-
"""
|
|
243
|
-
|
|
244
|
-
# Create a dictionary with feature histograms as values
|
|
245
|
-
histograms = {}
|
|
246
|
-
for feature, stats in histogram_dict.items():
|
|
247
|
-
if "hist" in stats:
|
|
248
|
-
# Normalize to probability distribution of each feature
|
|
249
|
-
histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
|
|
250
|
-
|
|
251
|
-
# Convert the dictionary to pandas DataFrame
|
|
252
|
-
histograms = pd.DataFrame(histograms)
|
|
253
|
-
|
|
254
|
-
return histograms
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
class PushToMonitoringWriter(StepToDict):
|
|
258
|
-
kind = "monitoring_application_stream_pusher"
|
|
259
|
-
|
|
260
|
-
def __init__(
|
|
261
|
-
self,
|
|
262
|
-
project: Optional[str] = None,
|
|
263
|
-
writer_application_name: Optional[str] = None,
|
|
264
|
-
stream_uri: Optional[str] = None,
|
|
265
|
-
name: Optional[str] = None,
|
|
266
|
-
):
|
|
267
|
-
"""
|
|
268
|
-
Class for pushing application results to the monitoring writer stream.
|
|
269
|
-
|
|
270
|
-
:param project: Project name.
|
|
271
|
-
:param writer_application_name: Writer application name.
|
|
272
|
-
:param stream_uri: Stream URI for pushing results.
|
|
273
|
-
:param name: Name of the PushToMonitoringWriter
|
|
274
|
-
instance default to PushToMonitoringWriter.
|
|
275
|
-
"""
|
|
276
|
-
self.project = project
|
|
277
|
-
self.application_name_to_push = writer_application_name
|
|
278
|
-
self.stream_uri = stream_uri or get_stream_path(
|
|
279
|
-
project=self.project, function_name=self.application_name_to_push
|
|
280
|
-
)
|
|
281
|
-
self.output_stream = None
|
|
282
|
-
self.name = name or "PushToMonitoringWriter"
|
|
283
|
-
|
|
284
|
-
def do(self, event: tuple[list[ModelMonitoringApplicationResult], dict]) -> None:
|
|
285
|
-
"""
|
|
286
|
-
Push application results to the monitoring writer stream.
|
|
287
|
-
|
|
288
|
-
:param event: Monitoring result(s) to push and the original event from the controller.
|
|
289
|
-
"""
|
|
290
|
-
self._lazy_init()
|
|
291
|
-
application_results, application_event = event
|
|
292
|
-
metadata = {
|
|
293
|
-
mm_constant.WriterEvent.APPLICATION_NAME: application_event[
|
|
294
|
-
mm_constant.ApplicationEvent.APPLICATION_NAME
|
|
295
|
-
],
|
|
296
|
-
mm_constant.WriterEvent.ENDPOINT_ID: application_event[
|
|
297
|
-
mm_constant.ApplicationEvent.ENDPOINT_ID
|
|
298
|
-
],
|
|
299
|
-
mm_constant.WriterEvent.START_INFER_TIME: application_event[
|
|
300
|
-
mm_constant.ApplicationEvent.START_INFER_TIME
|
|
301
|
-
],
|
|
302
|
-
mm_constant.WriterEvent.END_INFER_TIME: application_event[
|
|
303
|
-
mm_constant.ApplicationEvent.END_INFER_TIME
|
|
304
|
-
],
|
|
305
|
-
mm_constant.WriterEvent.CURRENT_STATS: json.dumps(
|
|
306
|
-
application_event[mm_constant.ApplicationEvent.CURRENT_STATS]
|
|
307
|
-
),
|
|
308
|
-
}
|
|
309
|
-
for result in application_results:
|
|
310
|
-
data = result.to_dict()
|
|
311
|
-
data.update(metadata)
|
|
312
|
-
logger.info(f"Pushing data = {data} \n to stream = {self.stream_uri}")
|
|
313
|
-
self.output_stream.push([data])
|
|
314
|
-
|
|
315
|
-
def _lazy_init(self):
|
|
316
|
-
if self.output_stream is None:
|
|
317
|
-
self.output_stream = get_stream_pusher(
|
|
318
|
-
self.stream_uri,
|
|
319
|
-
)
|
|
15
|
+
# TODO : delete this file in 1.9.0
|
|
16
|
+
from mlrun.model_monitoring.applications import ( # noqa: F401
|
|
17
|
+
ModelMonitoringApplicationBase,
|
|
18
|
+
ModelMonitoringApplicationResult,
|
|
19
|
+
)
|
|
@@ -11,3 +11,14 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
from .base import ModelMonitoringApplicationBase, ModelMonitoringApplicationBaseV2
|
|
17
|
+
from .context import MonitoringApplicationContext
|
|
18
|
+
from .evidently_base import (
|
|
19
|
+
_HAS_EVIDENTLY,
|
|
20
|
+
SUPPORTED_EVIDENTLY_VERSION,
|
|
21
|
+
EvidentlyModelMonitoringApplicationBase,
|
|
22
|
+
EvidentlyModelMonitoringApplicationBaseV2,
|
|
23
|
+
)
|
|
24
|
+
from .results import ModelMonitoringApplicationMetric, ModelMonitoringApplicationResult
|