mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +26 -22
- mlrun/__main__.py +15 -16
- mlrun/alerts/alert.py +150 -15
- mlrun/api/schemas/__init__.py +1 -9
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +62 -19
- mlrun/artifacts/dataset.py +17 -17
- mlrun/artifacts/document.py +454 -0
- mlrun/artifacts/manager.py +28 -18
- mlrun/artifacts/model.py +91 -59
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/formatters/feature_set.py +2 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +12 -62
- mlrun/common/runtimes/constants.py +25 -4
- mlrun/common/schemas/__init__.py +9 -5
- mlrun/common/schemas/alert.py +114 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +22 -9
- mlrun/common/schemas/auth.py +8 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +4 -4
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +4 -8
- mlrun/common/schemas/model_monitoring/constants.py +127 -46
- mlrun/common/schemas/model_monitoring/grafana.py +18 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +142 -0
- mlrun/common/schemas/pipeline.py +3 -3
- mlrun/common/schemas/project.py +26 -18
- mlrun/common/schemas/runs.py +3 -3
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +6 -5
- mlrun/common/types.py +1 -0
- mlrun/config.py +157 -89
- mlrun/data_types/__init__.py +5 -3
- mlrun/data_types/infer.py +13 -3
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +59 -18
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +19 -24
- mlrun/datastore/datastore.py +10 -4
- mlrun/datastore/datastore_profile.py +178 -45
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +14 -3
- mlrun/datastore/sources.py +89 -92
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/storeytargets.py +51 -16
- mlrun/datastore/targets.py +38 -31
- mlrun/datastore/utils.py +87 -4
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +291 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +286 -100
- mlrun/db/httpdb.py +1562 -490
- mlrun/db/nopdb.py +250 -83
- mlrun/errors.py +6 -2
- mlrun/execution.py +194 -50
- mlrun/feature_store/__init__.py +2 -10
- mlrun/feature_store/api.py +20 -458
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +105 -479
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +15 -11
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/feature_store/steps.py +3 -3
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +31 -31
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +6 -2
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +8 -4
- mlrun/model.py +132 -46
- mlrun/model_monitoring/__init__.py +3 -5
- mlrun/model_monitoring/api.py +113 -98
- mlrun/model_monitoring/applications/__init__.py +0 -5
- mlrun/model_monitoring/applications/_application_steps.py +81 -50
- mlrun/model_monitoring/applications/base.py +467 -14
- mlrun/model_monitoring/applications/context.py +212 -134
- mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
- mlrun/model_monitoring/applications/evidently/base.py +146 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
- mlrun/model_monitoring/applications/results.py +67 -15
- mlrun/model_monitoring/controller.py +701 -315
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +242 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
- mlrun/model_monitoring/db/tsdb/base.py +243 -49
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
- mlrun/model_monitoring/helpers.py +356 -114
- mlrun/model_monitoring/stream_processing.py +190 -345
- mlrun/model_monitoring/tracking_policy.py +11 -4
- mlrun/model_monitoring/writer.py +49 -90
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +2 -2
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +35 -32
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +30 -30
- mlrun/projects/pipelines.py +116 -47
- mlrun/projects/project.py +1292 -329
- mlrun/render.py +5 -9
- mlrun/run.py +57 -14
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +30 -22
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
- mlrun/runtimes/function_reference.py +5 -2
- mlrun/runtimes/generators.py +3 -2
- mlrun/runtimes/kubejob.py +6 -7
- mlrun/runtimes/mounts.py +574 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -13
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/runtimes/nuclio/function.py +127 -70
- mlrun/runtimes/nuclio/serving.py +105 -37
- mlrun/runtimes/pod.py +159 -54
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +22 -12
- mlrun/runtimes/utils.py +7 -6
- mlrun/secrets.py +2 -2
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +7 -5
- mlrun/serving/remote.py +35 -22
- mlrun/serving/routers.py +186 -240
- mlrun/serving/server.py +41 -10
- mlrun/serving/states.py +432 -118
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +161 -203
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +35 -22
- mlrun/utils/clones.py +7 -4
- mlrun/utils/helpers.py +511 -58
- mlrun/utils/logger.py +119 -13
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +39 -15
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +16 -8
- mlrun/utils/notifications/notification/webhook.py +24 -8
- mlrun/utils/notifications/notification_pusher.py +191 -200
- mlrun/utils/regex.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
- mlrun-1.8.0.dist-info/RECORD +351 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/applications/evidently_base.py +0 -137
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.2rc3.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
|
@@ -13,65 +13,150 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
|
-
import
|
|
16
|
+
import functools
|
|
17
|
+
import os
|
|
18
|
+
from fnmatch import fnmatchcase
|
|
19
|
+
from typing import TYPE_CHECKING, Callable, Optional, TypedDict, Union, cast
|
|
17
20
|
|
|
18
21
|
import numpy as np
|
|
19
22
|
import pandas as pd
|
|
20
23
|
|
|
21
|
-
if typing.TYPE_CHECKING:
|
|
22
|
-
from mlrun.db.base import RunDBInterface
|
|
23
|
-
from mlrun.projects import MlrunProject
|
|
24
|
-
|
|
25
24
|
import mlrun
|
|
26
25
|
import mlrun.artifacts
|
|
27
26
|
import mlrun.common.model_monitoring.helpers
|
|
28
27
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
29
28
|
import mlrun.data_types.infer
|
|
29
|
+
import mlrun.datastore.datastore_profile
|
|
30
30
|
import mlrun.model_monitoring
|
|
31
|
+
import mlrun.platforms.iguazio
|
|
32
|
+
import mlrun.utils.helpers
|
|
33
|
+
from mlrun.common.schemas import ModelEndpoint
|
|
31
34
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
32
35
|
ModelEndpointMonitoringMetric,
|
|
33
|
-
|
|
36
|
+
compose_full_name,
|
|
34
37
|
)
|
|
35
|
-
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
36
38
|
from mlrun.utils import logger
|
|
37
39
|
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from mlrun.datastore import DataItem
|
|
42
|
+
from mlrun.db.base import RunDBInterface
|
|
43
|
+
from mlrun.projects import MlrunProject
|
|
38
44
|
|
|
39
|
-
|
|
45
|
+
|
|
46
|
+
class _BatchDict(TypedDict):
|
|
40
47
|
minutes: int
|
|
41
48
|
hours: int
|
|
42
49
|
days: int
|
|
43
50
|
|
|
44
51
|
|
|
52
|
+
def _is_results_regex_match(
|
|
53
|
+
existing_result_name: Optional[str],
|
|
54
|
+
result_name_filters: Optional[list[str]],
|
|
55
|
+
) -> bool:
|
|
56
|
+
if existing_result_name.count(".") != 3 or any(
|
|
57
|
+
part == "" for part in existing_result_name.split(".")
|
|
58
|
+
):
|
|
59
|
+
logger.warning(
|
|
60
|
+
f"_is_results_regex_match: existing_result_name illegal, will be ignored."
|
|
61
|
+
f" existing_result_name: {existing_result_name}"
|
|
62
|
+
)
|
|
63
|
+
return False
|
|
64
|
+
existing_result_name = ".".join(existing_result_name.split(".")[i] for i in [1, 3])
|
|
65
|
+
for result_name_filter in result_name_filters:
|
|
66
|
+
if fnmatchcase(existing_result_name, result_name_filter):
|
|
67
|
+
return True
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def filter_results_by_regex(
|
|
72
|
+
existing_result_names: Optional[list[str]] = None,
|
|
73
|
+
result_name_filters: Optional[list[str]] = None,
|
|
74
|
+
) -> list[str]:
|
|
75
|
+
"""
|
|
76
|
+
Filter a list of existing result names by a list of filters.
|
|
77
|
+
|
|
78
|
+
This function returns only the results that match the filters provided. If no filters are given,
|
|
79
|
+
it returns all results. Invalid inputs are ignored.
|
|
80
|
+
|
|
81
|
+
:param existing_result_names: List of existing results' fully qualified names (FQNs)
|
|
82
|
+
in the format: endpoint_id.app_name.type.name.
|
|
83
|
+
Example: mep1.app1.result.metric1
|
|
84
|
+
:param result_name_filters: List of filters in the format: app.result_name.
|
|
85
|
+
Wildcards can be used, such as app.result* or *.result
|
|
86
|
+
|
|
87
|
+
:return: List of FQNs of the matching results
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
if not result_name_filters:
|
|
91
|
+
return existing_result_names
|
|
92
|
+
|
|
93
|
+
if not existing_result_names:
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
# filters validations
|
|
97
|
+
validated_filters = []
|
|
98
|
+
for result_name_filter in result_name_filters:
|
|
99
|
+
if result_name_filter.count(".") != 1:
|
|
100
|
+
logger.warning(
|
|
101
|
+
f"filter_results_by_regex: result_name_filter illegal, will be ignored."
|
|
102
|
+
f"Filter: {result_name_filter}"
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
validated_filters.append(result_name_filter)
|
|
106
|
+
filtered_metrics_names = []
|
|
107
|
+
for existing_result_name in existing_result_names:
|
|
108
|
+
if _is_results_regex_match(
|
|
109
|
+
existing_result_name=existing_result_name,
|
|
110
|
+
result_name_filters=validated_filters,
|
|
111
|
+
):
|
|
112
|
+
filtered_metrics_names.append(existing_result_name)
|
|
113
|
+
return list(set(filtered_metrics_names))
|
|
114
|
+
|
|
115
|
+
|
|
45
116
|
def get_stream_path(
|
|
46
117
|
project: str,
|
|
47
118
|
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
48
|
-
stream_uri:
|
|
119
|
+
stream_uri: Optional[str] = None,
|
|
120
|
+
secret_provider: Optional[Callable[[str], str]] = None,
|
|
121
|
+
profile: Optional[mlrun.datastore.datastore_profile.DatastoreProfile] = None,
|
|
49
122
|
) -> str:
|
|
50
123
|
"""
|
|
51
124
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
52
125
|
|
|
53
126
|
:param project: Project name.
|
|
54
127
|
:param function_name: Application name. Default is model_monitoring_stream.
|
|
55
|
-
:param stream_uri: Stream URI. If provided, it will be used instead of the one from the project secret.
|
|
56
|
-
|
|
128
|
+
:param stream_uri: Stream URI. If provided, it will be used instead of the one from the project's secret.
|
|
129
|
+
:param secret_provider: Optional secret provider to get the connection string secret.
|
|
130
|
+
If not set, the env vars are used.
|
|
131
|
+
:param profile: Optional datastore profile of the stream (V3IO/KafkaSource profile).
|
|
57
132
|
:return: Monitoring stream path to the relevant application.
|
|
58
133
|
"""
|
|
59
134
|
|
|
60
|
-
|
|
61
|
-
|
|
135
|
+
profile = profile or _get_stream_profile(
|
|
136
|
+
project=project, secret_provider=secret_provider
|
|
62
137
|
)
|
|
63
138
|
|
|
64
|
-
if
|
|
139
|
+
if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
|
|
65
140
|
stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
66
141
|
project=project,
|
|
67
142
|
kind=mm_constants.FileTargetKind.STREAM,
|
|
68
143
|
target="online",
|
|
69
144
|
function_name=function_name,
|
|
70
145
|
)
|
|
146
|
+
return stream_uri.replace("v3io://", f"ds://{profile.name}")
|
|
71
147
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
)
|
|
148
|
+
elif isinstance(
|
|
149
|
+
profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
|
|
150
|
+
):
|
|
151
|
+
topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
|
|
152
|
+
project=project, function_name=function_name
|
|
153
|
+
)
|
|
154
|
+
return f"ds://{profile.name}/{topic}"
|
|
155
|
+
else:
|
|
156
|
+
raise mlrun.errors.MLRunValueError(
|
|
157
|
+
f"Received an unexpected stream profile type: {type(profile)}\n"
|
|
158
|
+
"Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
|
|
159
|
+
)
|
|
75
160
|
|
|
76
161
|
|
|
77
162
|
def get_monitoring_parquet_path(
|
|
@@ -98,37 +183,198 @@ def get_monitoring_parquet_path(
|
|
|
98
183
|
return parquet_path
|
|
99
184
|
|
|
100
185
|
|
|
101
|
-
def
|
|
102
|
-
|
|
103
|
-
|
|
186
|
+
def get_monitoring_stats_directory_path(
|
|
187
|
+
project: str,
|
|
188
|
+
kind: str = mm_constants.FileTargetKind.STATS,
|
|
189
|
+
) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Get model monitoring stats target for the current project and kind. The stats target path is based on the
|
|
192
|
+
project artifact path. If project artifact path is not defined, the stats target path will be based on MLRun
|
|
193
|
+
artifact path.
|
|
194
|
+
:param project: Project object.
|
|
195
|
+
:param kind: indicate the kind of the stats path
|
|
196
|
+
:return: Monitoring stats target path.
|
|
197
|
+
"""
|
|
198
|
+
stats_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
199
|
+
project=project,
|
|
200
|
+
kind=kind,
|
|
201
|
+
)
|
|
202
|
+
return stats_path
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _get_monitoring_current_stats_file_path(project: str, endpoint_id: str) -> str:
|
|
206
|
+
return os.path.join(
|
|
207
|
+
get_monitoring_stats_directory_path(project),
|
|
208
|
+
f"{endpoint_id}_current_stats.json",
|
|
209
|
+
)
|
|
104
210
|
|
|
105
|
-
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
106
211
|
|
|
107
|
-
|
|
212
|
+
def _get_monitoring_drift_measures_file_path(project: str, endpoint_id: str) -> str:
|
|
213
|
+
return os.path.join(
|
|
214
|
+
get_monitoring_stats_directory_path(project),
|
|
215
|
+
f"{endpoint_id}_drift_measures.json",
|
|
216
|
+
)
|
|
217
|
+
|
|
108
218
|
|
|
219
|
+
def get_monitoring_current_stats_data(project: str, endpoint_id: str) -> "DataItem":
|
|
109
220
|
"""
|
|
221
|
+
getter for data item of current stats for project and endpoint
|
|
222
|
+
:param project: project name str
|
|
223
|
+
:param endpoint_id: endpoint id str
|
|
224
|
+
:return: DataItem
|
|
225
|
+
"""
|
|
226
|
+
return mlrun.datastore.store_manager.object(
|
|
227
|
+
_get_monitoring_current_stats_file_path(
|
|
228
|
+
project=project, endpoint_id=endpoint_id
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
|
|
110
232
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
233
|
+
def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataItem":
|
|
234
|
+
"""
|
|
235
|
+
getter for data item of drift measures for project and endpoint
|
|
236
|
+
:param project: project name str
|
|
237
|
+
:param endpoint_id: endpoint id str
|
|
238
|
+
:return: DataItem
|
|
239
|
+
"""
|
|
240
|
+
return mlrun.datastore.store_manager.object(
|
|
241
|
+
_get_monitoring_drift_measures_file_path(
|
|
242
|
+
project=project, endpoint_id=endpoint_id
|
|
243
|
+
)
|
|
114
244
|
)
|
|
115
245
|
|
|
116
246
|
|
|
117
|
-
def
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
123
|
-
:return: Valid TSDB connection string.
|
|
247
|
+
def _get_profile(
|
|
248
|
+
project: str,
|
|
249
|
+
secret_provider: Optional[Callable[[str], str]],
|
|
250
|
+
profile_name_key: str,
|
|
251
|
+
) -> mlrun.datastore.datastore_profile.DatastoreProfile:
|
|
124
252
|
"""
|
|
253
|
+
Get the datastore profile from the project name and secret provider, where the profile's name
|
|
254
|
+
is saved as a secret named `profile_name_key`.
|
|
125
255
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
256
|
+
:param project: The project name.
|
|
257
|
+
:param secret_provider: Secret provider to get the secrets from, or `None` for env vars.
|
|
258
|
+
:param profile_name_key: The profile name key in the secret store.
|
|
259
|
+
:return: Datastore profile.
|
|
260
|
+
"""
|
|
261
|
+
profile_name = mlrun.get_secret_or_env(
|
|
262
|
+
key=profile_name_key, secret_provider=secret_provider
|
|
263
|
+
)
|
|
264
|
+
if not profile_name:
|
|
265
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
266
|
+
f"Not found `{profile_name_key}` profile name for project '{project}'"
|
|
267
|
+
)
|
|
268
|
+
return mlrun.datastore.datastore_profile.datastore_profile_read(
|
|
269
|
+
url=f"ds://{profile_name}", project_name=project, secrets=secret_provider
|
|
129
270
|
)
|
|
130
271
|
|
|
131
272
|
|
|
273
|
+
_get_tsdb_profile = functools.partial(
|
|
274
|
+
_get_profile, profile_name_key=mm_constants.ProjectSecretKeys.TSDB_PROFILE_NAME
|
|
275
|
+
)
|
|
276
|
+
_get_stream_profile = functools.partial(
|
|
277
|
+
_get_profile, profile_name_key=mm_constants.ProjectSecretKeys.STREAM_PROFILE_NAME
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _get_v3io_output_stream(
|
|
282
|
+
*,
|
|
283
|
+
v3io_profile: mlrun.datastore.datastore_profile.DatastoreProfileV3io,
|
|
284
|
+
project: str,
|
|
285
|
+
function_name: str,
|
|
286
|
+
v3io_access_key: Optional[str],
|
|
287
|
+
mock: bool = False,
|
|
288
|
+
) -> mlrun.platforms.iguazio.OutputStream:
|
|
289
|
+
stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
290
|
+
project=project,
|
|
291
|
+
kind=mm_constants.FileTargetKind.STREAM,
|
|
292
|
+
target="online",
|
|
293
|
+
function_name=function_name,
|
|
294
|
+
)
|
|
295
|
+
endpoint, stream_path = mlrun.platforms.iguazio.parse_path(stream_uri)
|
|
296
|
+
return mlrun.platforms.iguazio.OutputStream(
|
|
297
|
+
stream_path,
|
|
298
|
+
endpoint=endpoint,
|
|
299
|
+
access_key=v3io_access_key or v3io_profile.v3io_access_key,
|
|
300
|
+
mock=mock,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _get_kafka_output_stream(
|
|
305
|
+
*,
|
|
306
|
+
kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
|
|
307
|
+
project: str,
|
|
308
|
+
function_name: str,
|
|
309
|
+
mock: bool = False,
|
|
310
|
+
) -> mlrun.platforms.iguazio.KafkaOutputStream:
|
|
311
|
+
topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
|
|
312
|
+
project=project, function_name=function_name
|
|
313
|
+
)
|
|
314
|
+
attributes = kafka_profile.attributes()
|
|
315
|
+
producer_options = mlrun.datastore.utils.KafkaParameters(attributes).producer()
|
|
316
|
+
|
|
317
|
+
return mlrun.platforms.iguazio.KafkaOutputStream(
|
|
318
|
+
brokers=kafka_profile.brokers,
|
|
319
|
+
topic=topic,
|
|
320
|
+
producer_options=producer_options,
|
|
321
|
+
mock=mock,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def get_output_stream(
|
|
326
|
+
project: str,
|
|
327
|
+
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
328
|
+
secret_provider: Optional[Callable[[str], str]] = None,
|
|
329
|
+
profile: Optional[mlrun.datastore.datastore_profile.DatastoreProfile] = None,
|
|
330
|
+
v3io_access_key: Optional[str] = None,
|
|
331
|
+
mock: bool = False,
|
|
332
|
+
) -> Union[
|
|
333
|
+
mlrun.platforms.iguazio.OutputStream, mlrun.platforms.iguazio.KafkaOutputStream
|
|
334
|
+
]:
|
|
335
|
+
"""
|
|
336
|
+
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
337
|
+
|
|
338
|
+
:param project: Project name.
|
|
339
|
+
:param function_name: Application name. Default is model_monitoring_stream.
|
|
340
|
+
:param secret_provider: Optional secret provider to get the connection string secret.
|
|
341
|
+
If not set, the env vars are used.
|
|
342
|
+
:param profile: Optional datastore profile of the stream (V3IO/KafkaSource profile).
|
|
343
|
+
:param v3io_access_key: Optional V3IO access key.
|
|
344
|
+
:param mock: Should the output stream be mocked or not.
|
|
345
|
+
:return: Monitoring stream path to the relevant application.
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
profile = profile or _get_stream_profile(
|
|
349
|
+
project=project, secret_provider=secret_provider
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
|
|
353
|
+
return _get_v3io_output_stream(
|
|
354
|
+
v3io_profile=profile,
|
|
355
|
+
project=project,
|
|
356
|
+
function_name=function_name,
|
|
357
|
+
v3io_access_key=v3io_access_key,
|
|
358
|
+
mock=mock,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
elif isinstance(
|
|
362
|
+
profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
|
|
363
|
+
):
|
|
364
|
+
return _get_kafka_output_stream(
|
|
365
|
+
kafka_profile=profile,
|
|
366
|
+
project=project,
|
|
367
|
+
function_name=function_name,
|
|
368
|
+
mock=mock,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
else:
|
|
372
|
+
raise mlrun.errors.MLRunValueError(
|
|
373
|
+
f"Received an unexpected stream profile type: {type(profile)}\n"
|
|
374
|
+
"Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
|
|
132
378
|
def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
|
|
133
379
|
"""
|
|
134
380
|
Convert a batch dictionary to timedelta.
|
|
@@ -186,51 +432,23 @@ def update_model_endpoint_last_request(
|
|
|
186
432
|
:param current_request: current request time
|
|
187
433
|
:param db: DB interface.
|
|
188
434
|
"""
|
|
189
|
-
is_model_server_endpoint = model_endpoint.spec.stream_path != ""
|
|
190
|
-
if is_model_server_endpoint:
|
|
191
|
-
current_request = current_request.isoformat()
|
|
192
|
-
logger.info(
|
|
193
|
-
"Update model endpoint last request time (EP with serving)",
|
|
194
|
-
project=project,
|
|
195
|
-
endpoint_id=model_endpoint.metadata.uid,
|
|
196
|
-
last_request=model_endpoint.status.last_request,
|
|
197
|
-
current_request=current_request,
|
|
198
|
-
)
|
|
199
|
-
db.patch_model_endpoint(
|
|
200
|
-
project=project,
|
|
201
|
-
endpoint_id=model_endpoint.metadata.uid,
|
|
202
|
-
attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
|
|
203
|
-
)
|
|
204
|
-
else: # model endpoint without any serving function - close the window "manually"
|
|
205
|
-
try:
|
|
206
|
-
time_window = _get_monitoring_time_window_from_controller_run(project, db)
|
|
207
|
-
except mlrun.errors.MLRunNotFoundError:
|
|
208
|
-
logger.warn(
|
|
209
|
-
"Not bumping model endpoint last request time - the monitoring controller isn't deployed yet.\n"
|
|
210
|
-
"Call `project.enable_model_monitoring()` first."
|
|
211
|
-
)
|
|
212
|
-
return
|
|
213
435
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
project=project,
|
|
231
|
-
endpoint_id=model_endpoint.metadata.uid,
|
|
232
|
-
attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
233
|
-
)
|
|
436
|
+
logger.info(
|
|
437
|
+
"Update model endpoint last request time (EP with serving)",
|
|
438
|
+
project=project,
|
|
439
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
440
|
+
name=model_endpoint.metadata.name,
|
|
441
|
+
function_name=model_endpoint.spec.function_name,
|
|
442
|
+
last_request=model_endpoint.status.last_request,
|
|
443
|
+
current_request=current_request,
|
|
444
|
+
)
|
|
445
|
+
db.patch_model_endpoint(
|
|
446
|
+
project=project,
|
|
447
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
448
|
+
name=model_endpoint.metadata.name,
|
|
449
|
+
function_name=model_endpoint.spec.function_name,
|
|
450
|
+
attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
|
|
451
|
+
)
|
|
234
452
|
|
|
235
453
|
|
|
236
454
|
def calculate_inputs_statistics(
|
|
@@ -270,23 +488,28 @@ def calculate_inputs_statistics(
|
|
|
270
488
|
return inputs_statistics
|
|
271
489
|
|
|
272
490
|
|
|
273
|
-
def get_endpoint_record(
|
|
274
|
-
project: str,
|
|
275
|
-
endpoint_id: str,
|
|
276
|
-
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
277
|
-
) -> dict[str, typing.Any]:
|
|
278
|
-
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
279
|
-
project=project, secret_provider=secret_provider
|
|
280
|
-
)
|
|
281
|
-
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
282
|
-
|
|
283
|
-
|
|
284
491
|
def get_result_instance_fqn(
|
|
285
492
|
model_endpoint_id: str, app_name: str, result_name: str
|
|
286
493
|
) -> str:
|
|
287
494
|
return f"{model_endpoint_id}.{app_name}.result.{result_name}"
|
|
288
495
|
|
|
289
496
|
|
|
497
|
+
def get_alert_name_from_result_fqn(result_fqn: str):
|
|
498
|
+
"""
|
|
499
|
+
:param result_fqn: current get_result_instance_fqn format: `{model_endpoint_id}.{app_name}.result.{result_name}`
|
|
500
|
+
|
|
501
|
+
:return: shorter fqn without forbidden alert characters.
|
|
502
|
+
"""
|
|
503
|
+
if result_fqn.count(".") != 3 or result_fqn.split(".")[2] != "result":
|
|
504
|
+
raise mlrun.errors.MLRunValueError(
|
|
505
|
+
f"result_fqn: {result_fqn} is not in the correct format: {{model_endpoint_id}}.{{app_name}}."
|
|
506
|
+
f"result.{{result_name}}"
|
|
507
|
+
)
|
|
508
|
+
# Name format cannot contain "."
|
|
509
|
+
# The third component is always `result`, so it is not necessary for checking uniqueness.
|
|
510
|
+
return "_".join(result_fqn.split(".")[i] for i in [0, 1, 3])
|
|
511
|
+
|
|
512
|
+
|
|
290
513
|
def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
291
514
|
return get_result_instance_fqn(
|
|
292
515
|
model_endpoint_id,
|
|
@@ -296,7 +519,7 @@ def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
|
296
519
|
|
|
297
520
|
|
|
298
521
|
def get_invocations_fqn(project: str) -> str:
|
|
299
|
-
return
|
|
522
|
+
return compose_full_name(
|
|
300
523
|
project=project,
|
|
301
524
|
app=mm_constants.SpecialApps.MLRUN_INFRA,
|
|
302
525
|
name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
|
|
@@ -320,33 +543,52 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
|
|
|
320
543
|
)
|
|
321
544
|
|
|
322
545
|
|
|
323
|
-
def
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
546
|
+
def _get_monitoring_schedules_folder_path(project: str) -> str:
|
|
547
|
+
return cast(
|
|
548
|
+
str,
|
|
549
|
+
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
550
|
+
project=project, kind=mm_constants.FileTargetKind.MONITORING_SCHEDULES
|
|
551
|
+
),
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _get_monitoring_schedules_file_endpoint_path(
|
|
556
|
+
*, project: str, endpoint_id: str
|
|
557
|
+
) -> str:
|
|
558
|
+
return os.path.join(
|
|
559
|
+
_get_monitoring_schedules_folder_path(project), f"{endpoint_id}.json"
|
|
560
|
+
)
|
|
331
561
|
|
|
332
|
-
:param model_endpoint: An object representing the model endpoint that will be enriched with the model uri.
|
|
333
|
-
:param model_obj: An object representing the model artifact.
|
|
334
562
|
|
|
335
|
-
|
|
563
|
+
def get_monitoring_schedules_endpoint_data(
|
|
564
|
+
*, project: str, endpoint_id: str
|
|
565
|
+
) -> "DataItem":
|
|
336
566
|
"""
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
567
|
+
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
568
|
+
"""
|
|
569
|
+
return mlrun.datastore.store_manager.object(
|
|
570
|
+
_get_monitoring_schedules_file_endpoint_path(
|
|
571
|
+
project=project, endpoint_id=endpoint_id
|
|
572
|
+
)
|
|
341
573
|
)
|
|
342
574
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
575
|
+
|
|
576
|
+
def get_monitoring_schedules_chief_data(
|
|
577
|
+
*,
|
|
578
|
+
project: str,
|
|
579
|
+
) -> "DataItem":
|
|
580
|
+
"""
|
|
581
|
+
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
582
|
+
"""
|
|
583
|
+
return mlrun.datastore.store_manager.object(
|
|
584
|
+
_get_monitoring_schedules_file_chief_path(project=project)
|
|
349
585
|
)
|
|
350
|
-
|
|
351
|
-
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _get_monitoring_schedules_file_chief_path(
|
|
589
|
+
*,
|
|
590
|
+
project: str,
|
|
591
|
+
) -> str:
|
|
592
|
+
return os.path.join(
|
|
593
|
+
_get_monitoring_schedules_folder_path(project), f"{project}.json"
|
|
352
594
|
)
|