mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +31 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +13 -2
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +233 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +387 -119
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +245 -20
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +909 -231
- mlrun/db/nopdb.py +279 -14
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1176 -406
- mlrun/render.py +28 -22
- mlrun/run.py +208 -181
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +54 -24
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/METADATA +0 -269
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
|
@@ -1,574 +0,0 @@
|
|
|
1
|
-
# Copyright 2023 Iguazio
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
#
|
|
15
|
-
|
|
16
|
-
import json
|
|
17
|
-
import os
|
|
18
|
-
import typing
|
|
19
|
-
|
|
20
|
-
import v3io.dataplane
|
|
21
|
-
import v3io_frames
|
|
22
|
-
|
|
23
|
-
import mlrun.common.model_monitoring.helpers
|
|
24
|
-
import mlrun.common.schemas.model_monitoring
|
|
25
|
-
import mlrun.utils.v3io_clients
|
|
26
|
-
from mlrun.utils import logger
|
|
27
|
-
|
|
28
|
-
from .model_endpoint_store import ModelEndpointStore
|
|
29
|
-
|
|
30
|
-
# Fields to encode before storing in the KV table or to decode after retrieving
|
|
31
|
-
fields_to_encode_decode = [
|
|
32
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS,
|
|
33
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.CURRENT_STATS,
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class KVModelEndpointStore(ModelEndpointStore):
|
|
38
|
-
"""
|
|
39
|
-
Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
|
|
40
|
-
client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
def __init__(self, project: str, access_key: str):
|
|
44
|
-
super().__init__(project=project)
|
|
45
|
-
# Initialize a V3IO client instance
|
|
46
|
-
self.access_key = access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
47
|
-
self.client = mlrun.utils.v3io_clients.get_v3io_client(
|
|
48
|
-
endpoint=mlrun.mlconf.v3io_api, access_key=self.access_key
|
|
49
|
-
)
|
|
50
|
-
# Get the KV table path and container
|
|
51
|
-
self.path, self.container = self._get_path_and_container()
|
|
52
|
-
|
|
53
|
-
def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
|
|
54
|
-
"""
|
|
55
|
-
Create a new endpoint record in the KV table.
|
|
56
|
-
|
|
57
|
-
:param endpoint: model endpoint dictionary that will be written into the DB.
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
for field in fields_to_encode_decode:
|
|
61
|
-
if field in endpoint:
|
|
62
|
-
# Encode to binary data
|
|
63
|
-
endpoint[field] = self._encode_field(endpoint[field])
|
|
64
|
-
|
|
65
|
-
self.client.kv.put(
|
|
66
|
-
container=self.container,
|
|
67
|
-
table_path=self.path,
|
|
68
|
-
key=endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID],
|
|
69
|
-
attributes=endpoint,
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
self._infer_kv_schema()
|
|
73
|
-
|
|
74
|
-
def update_model_endpoint(
|
|
75
|
-
self, endpoint_id: str, attributes: dict[str, typing.Any]
|
|
76
|
-
):
|
|
77
|
-
"""
|
|
78
|
-
Update a model endpoint record with a given attributes.
|
|
79
|
-
|
|
80
|
-
:param endpoint_id: The unique id of the model endpoint.
|
|
81
|
-
:param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
|
|
82
|
-
of the attributes dictionary should exist in the KV table.
|
|
83
|
-
|
|
84
|
-
"""
|
|
85
|
-
|
|
86
|
-
for field in fields_to_encode_decode:
|
|
87
|
-
if field in attributes:
|
|
88
|
-
# Encode to binary data
|
|
89
|
-
attributes[field] = self._encode_field(attributes[field])
|
|
90
|
-
|
|
91
|
-
self.client.kv.update(
|
|
92
|
-
container=self.container,
|
|
93
|
-
table_path=self.path,
|
|
94
|
-
key=endpoint_id,
|
|
95
|
-
attributes=attributes,
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
def delete_model_endpoint(
|
|
99
|
-
self,
|
|
100
|
-
endpoint_id: str,
|
|
101
|
-
):
|
|
102
|
-
"""
|
|
103
|
-
Deletes the KV record of a given model endpoint id.
|
|
104
|
-
|
|
105
|
-
:param endpoint_id: The unique id of the model endpoint.
|
|
106
|
-
"""
|
|
107
|
-
|
|
108
|
-
self.client.kv.delete(
|
|
109
|
-
container=self.container,
|
|
110
|
-
table_path=self.path,
|
|
111
|
-
key=endpoint_id,
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
def get_model_endpoint(
|
|
115
|
-
self,
|
|
116
|
-
endpoint_id: str,
|
|
117
|
-
) -> dict[str, typing.Any]:
|
|
118
|
-
"""
|
|
119
|
-
Get a single model endpoint record.
|
|
120
|
-
|
|
121
|
-
:param endpoint_id: The unique id of the model endpoint.
|
|
122
|
-
|
|
123
|
-
:return: A model endpoint record as a dictionary.
|
|
124
|
-
|
|
125
|
-
:raise MLRunNotFoundError: If the endpoint was not found.
|
|
126
|
-
"""
|
|
127
|
-
|
|
128
|
-
# Getting the raw data from the KV table
|
|
129
|
-
endpoint = self.client.kv.get(
|
|
130
|
-
container=self.container,
|
|
131
|
-
table_path=self.path,
|
|
132
|
-
key=endpoint_id,
|
|
133
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
134
|
-
access_key=self.access_key,
|
|
135
|
-
)
|
|
136
|
-
endpoint = endpoint.output.item
|
|
137
|
-
|
|
138
|
-
for field in fields_to_encode_decode:
|
|
139
|
-
if field in endpoint:
|
|
140
|
-
# Decode binary data
|
|
141
|
-
endpoint[field] = self._decode_field(endpoint[field])
|
|
142
|
-
|
|
143
|
-
if not endpoint:
|
|
144
|
-
raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
|
|
145
|
-
|
|
146
|
-
# For backwards compatability: replace null values for `error_count` and `metrics`
|
|
147
|
-
self.validate_old_schema_fields(endpoint=endpoint)
|
|
148
|
-
|
|
149
|
-
return endpoint
|
|
150
|
-
|
|
151
|
-
def _get_path_and_container(self):
|
|
152
|
-
"""Getting path and container based on the model monitoring configurations"""
|
|
153
|
-
path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
154
|
-
project=self.project,
|
|
155
|
-
kind=mlrun.common.schemas.ModelMonitoringStoreKinds.ENDPOINTS,
|
|
156
|
-
)
|
|
157
|
-
(
|
|
158
|
-
_,
|
|
159
|
-
container,
|
|
160
|
-
path,
|
|
161
|
-
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
162
|
-
path
|
|
163
|
-
)
|
|
164
|
-
return path, container
|
|
165
|
-
|
|
166
|
-
def list_model_endpoints(
|
|
167
|
-
self,
|
|
168
|
-
model: str = None,
|
|
169
|
-
function: str = None,
|
|
170
|
-
labels: list[str] = None,
|
|
171
|
-
top_level: bool = None,
|
|
172
|
-
uids: list = None,
|
|
173
|
-
) -> list[dict[str, typing.Any]]:
|
|
174
|
-
"""
|
|
175
|
-
Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
|
|
176
|
-
By default, when no filters are applied, all available model endpoints for the given project will
|
|
177
|
-
be listed.
|
|
178
|
-
|
|
179
|
-
:param model: The name of the model to filter by.
|
|
180
|
-
:param function: The name of the function to filter by.
|
|
181
|
-
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value
|
|
182
|
-
of a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
183
|
-
key (i.e. "key").
|
|
184
|
-
:param top_level: If True will return only routers and endpoint that are NOT children of any router.
|
|
185
|
-
:param uids: List of model endpoint unique ids to include in the result.
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
:return: A list of model endpoint dictionaries.
|
|
189
|
-
"""
|
|
190
|
-
|
|
191
|
-
# # Initialize an empty model endpoints list
|
|
192
|
-
endpoint_list = []
|
|
193
|
-
|
|
194
|
-
# Retrieve the raw data from the KV table and get the endpoint ids
|
|
195
|
-
try:
|
|
196
|
-
cursor = self.client.kv.new_cursor(
|
|
197
|
-
container=self.container,
|
|
198
|
-
table_path=self.path,
|
|
199
|
-
filter_expression=self._build_kv_cursor_filter_expression(
|
|
200
|
-
self.project,
|
|
201
|
-
function,
|
|
202
|
-
model,
|
|
203
|
-
labels,
|
|
204
|
-
top_level,
|
|
205
|
-
),
|
|
206
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
207
|
-
)
|
|
208
|
-
items = cursor.all()
|
|
209
|
-
|
|
210
|
-
except Exception as exc:
|
|
211
|
-
logger.warning(
|
|
212
|
-
"Failed retrieving raw data from kv table",
|
|
213
|
-
exc=mlrun.errors.err_to_str(exc),
|
|
214
|
-
)
|
|
215
|
-
return endpoint_list
|
|
216
|
-
|
|
217
|
-
# Create a list of model endpoints unique ids
|
|
218
|
-
if uids is None:
|
|
219
|
-
uids = []
|
|
220
|
-
for item in items:
|
|
221
|
-
if mlrun.common.schemas.model_monitoring.EventFieldType.UID not in item:
|
|
222
|
-
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
223
|
-
uids.append(
|
|
224
|
-
item[
|
|
225
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
226
|
-
]
|
|
227
|
-
)
|
|
228
|
-
else:
|
|
229
|
-
uids.append(
|
|
230
|
-
item[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
# Add each relevant model endpoint to the model endpoints list
|
|
234
|
-
for endpoint_id in uids:
|
|
235
|
-
endpoint = self.get_model_endpoint(
|
|
236
|
-
endpoint_id=endpoint_id,
|
|
237
|
-
)
|
|
238
|
-
endpoint_list.append(endpoint)
|
|
239
|
-
|
|
240
|
-
return endpoint_list
|
|
241
|
-
|
|
242
|
-
def delete_model_endpoints_resources(self, endpoints: list[dict[str, typing.Any]]):
|
|
243
|
-
"""
|
|
244
|
-
Delete all model endpoints resources in both KV and the time series DB.
|
|
245
|
-
|
|
246
|
-
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
247
|
-
"""
|
|
248
|
-
|
|
249
|
-
# Delete model endpoint record from KV table
|
|
250
|
-
for endpoint_dict in endpoints:
|
|
251
|
-
if (
|
|
252
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
253
|
-
not in endpoint_dict
|
|
254
|
-
):
|
|
255
|
-
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
256
|
-
endpoint_id = endpoint_dict[
|
|
257
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
258
|
-
]
|
|
259
|
-
else:
|
|
260
|
-
endpoint_id = endpoint_dict[
|
|
261
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
262
|
-
]
|
|
263
|
-
self.delete_model_endpoint(
|
|
264
|
-
endpoint_id,
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
# Delete remain records in the KV
|
|
268
|
-
all_records = self.client.kv.new_cursor(
|
|
269
|
-
container=self.container,
|
|
270
|
-
table_path=self.path,
|
|
271
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
272
|
-
).all()
|
|
273
|
-
|
|
274
|
-
all_records = [r["__name"] for r in all_records]
|
|
275
|
-
|
|
276
|
-
# Cleanup KV
|
|
277
|
-
for record in all_records:
|
|
278
|
-
self.client.kv.delete(
|
|
279
|
-
container=self.container,
|
|
280
|
-
table_path=self.path,
|
|
281
|
-
key=record,
|
|
282
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
# Cleanup TSDB
|
|
286
|
-
frames = self._get_frames_client()
|
|
287
|
-
|
|
288
|
-
# Generate the required tsdb paths
|
|
289
|
-
tsdb_path, filtered_path = self._generate_tsdb_paths()
|
|
290
|
-
|
|
291
|
-
# Delete time series DB resources
|
|
292
|
-
try:
|
|
293
|
-
frames.delete(
|
|
294
|
-
backend=mlrun.common.schemas.model_monitoring.TimeSeriesTarget.TSDB,
|
|
295
|
-
table=filtered_path,
|
|
296
|
-
)
|
|
297
|
-
except v3io_frames.errors.DeleteError as e:
|
|
298
|
-
if "No TSDB schema file found" not in str(e):
|
|
299
|
-
logger.warning(
|
|
300
|
-
f"Failed to delete TSDB table '{filtered_path}'",
|
|
301
|
-
err=mlrun.errors.err_to_str(e),
|
|
302
|
-
)
|
|
303
|
-
# Final cleanup of tsdb path
|
|
304
|
-
tsdb_path.replace("://u", ":///u")
|
|
305
|
-
store, _, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
|
|
306
|
-
store.rm(tsdb_path, recursive=True)
|
|
307
|
-
|
|
308
|
-
def get_endpoint_real_time_metrics(
|
|
309
|
-
self,
|
|
310
|
-
endpoint_id: str,
|
|
311
|
-
metrics: list[str],
|
|
312
|
-
start: str = "now-1h",
|
|
313
|
-
end: str = "now",
|
|
314
|
-
access_key: str = None,
|
|
315
|
-
) -> dict[str, list[tuple[str, float]]]:
|
|
316
|
-
"""
|
|
317
|
-
Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
|
|
318
|
-
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
319
|
-
|
|
320
|
-
:param endpoint_id: The unique id of the model endpoint.
|
|
321
|
-
:param metrics: A list of real-time metrics to return for the model endpoint.
|
|
322
|
-
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
323
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
324
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
325
|
-
earliest time.
|
|
326
|
-
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
327
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
328
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
329
|
-
earliest time.
|
|
330
|
-
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
331
|
-
provided, the access key will be retrieved from the environment variables.
|
|
332
|
-
|
|
333
|
-
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
334
|
-
includes timestamps and the values.
|
|
335
|
-
"""
|
|
336
|
-
|
|
337
|
-
# Initialize access key
|
|
338
|
-
access_key = access_key or mlrun.mlconf.get_v3io_access_key()
|
|
339
|
-
|
|
340
|
-
if not metrics:
|
|
341
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
342
|
-
"Metric names must be provided"
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
# Initialize metrics mapping dictionary
|
|
346
|
-
metrics_mapping = {}
|
|
347
|
-
|
|
348
|
-
# Getting the path for the time series DB
|
|
349
|
-
events_path = (
|
|
350
|
-
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
351
|
-
project=self.project,
|
|
352
|
-
kind=mlrun.common.schemas.ModelMonitoringStoreKinds.EVENTS,
|
|
353
|
-
)
|
|
354
|
-
)
|
|
355
|
-
(
|
|
356
|
-
_,
|
|
357
|
-
container,
|
|
358
|
-
events_path,
|
|
359
|
-
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
360
|
-
events_path
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
# Retrieve the raw data from the time series DB based on the provided metrics and time ranges
|
|
364
|
-
frames_client = mlrun.utils.v3io_clients.get_frames_client(
|
|
365
|
-
token=access_key,
|
|
366
|
-
address=mlrun.mlconf.v3io_framesd,
|
|
367
|
-
container=container,
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
try:
|
|
371
|
-
data = frames_client.read(
|
|
372
|
-
backend=mlrun.common.schemas.model_monitoring.TimeSeriesTarget.TSDB,
|
|
373
|
-
table=events_path,
|
|
374
|
-
columns=["endpoint_id", *metrics],
|
|
375
|
-
filter=f"endpoint_id=='{endpoint_id}'",
|
|
376
|
-
start=start,
|
|
377
|
-
end=end,
|
|
378
|
-
)
|
|
379
|
-
|
|
380
|
-
# Fill the metrics mapping dictionary with the metric name and values
|
|
381
|
-
data_dict = data.to_dict()
|
|
382
|
-
for metric in metrics:
|
|
383
|
-
metric_data = data_dict.get(metric)
|
|
384
|
-
if metric_data is None:
|
|
385
|
-
continue
|
|
386
|
-
|
|
387
|
-
values = [
|
|
388
|
-
(str(timestamp), value) for timestamp, value in metric_data.items()
|
|
389
|
-
]
|
|
390
|
-
metrics_mapping[metric] = values
|
|
391
|
-
|
|
392
|
-
except v3io_frames.errors.ReadError:
|
|
393
|
-
logger.warn("Failed to read tsdb", endpoint=endpoint_id)
|
|
394
|
-
|
|
395
|
-
return metrics_mapping
|
|
396
|
-
|
|
397
|
-
def _generate_tsdb_paths(self) -> tuple[str, str]:
|
|
398
|
-
"""Generate a short path to the TSDB resources and a filtered path for the frames object
|
|
399
|
-
:return: A tuple of:
|
|
400
|
-
[0] = Short path to the TSDB resources
|
|
401
|
-
[1] = Filtered path to TSDB events without schema and container
|
|
402
|
-
"""
|
|
403
|
-
# Full path for the time series DB events
|
|
404
|
-
full_path = (
|
|
405
|
-
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
406
|
-
project=self.project,
|
|
407
|
-
kind=mlrun.common.schemas.ModelMonitoringStoreKinds.EVENTS,
|
|
408
|
-
)
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
# Generate the main directory with the TSDB resources
|
|
412
|
-
tsdb_path = (
|
|
413
|
-
mlrun.common.model_monitoring.helpers.parse_model_endpoint_project_prefix(
|
|
414
|
-
full_path, self.project
|
|
415
|
-
)
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
# Generate filtered path without schema and container as required by the frames object
|
|
419
|
-
(
|
|
420
|
-
_,
|
|
421
|
-
_,
|
|
422
|
-
filtered_path,
|
|
423
|
-
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
424
|
-
full_path
|
|
425
|
-
)
|
|
426
|
-
return tsdb_path, filtered_path
|
|
427
|
-
|
|
428
|
-
def _infer_kv_schema(self):
|
|
429
|
-
"""
|
|
430
|
-
Create KV schema file if not exist. This schema is being used by the Grafana dashboards.
|
|
431
|
-
"""
|
|
432
|
-
|
|
433
|
-
schema_file = self.client.kv.new_cursor(
|
|
434
|
-
container=self.container,
|
|
435
|
-
table_path=self.path,
|
|
436
|
-
filter_expression='__name==".#schema"',
|
|
437
|
-
)
|
|
438
|
-
|
|
439
|
-
if not schema_file.all():
|
|
440
|
-
logger.info("Generate a new V3IO KV schema file", kv_table_path=self.path)
|
|
441
|
-
frames_client = self._get_frames_client()
|
|
442
|
-
frames_client.execute(backend="kv", table=self.path, command="infer_schema")
|
|
443
|
-
|
|
444
|
-
def _get_frames_client(self):
|
|
445
|
-
return mlrun.utils.v3io_clients.get_frames_client(
|
|
446
|
-
token=self.access_key,
|
|
447
|
-
address=mlrun.mlconf.v3io_framesd,
|
|
448
|
-
container=self.container,
|
|
449
|
-
)
|
|
450
|
-
|
|
451
|
-
@staticmethod
|
|
452
|
-
def _build_kv_cursor_filter_expression(
|
|
453
|
-
project: str,
|
|
454
|
-
function: str = None,
|
|
455
|
-
model: str = None,
|
|
456
|
-
labels: list[str] = None,
|
|
457
|
-
top_level: bool = False,
|
|
458
|
-
) -> str:
|
|
459
|
-
"""
|
|
460
|
-
Convert the provided filters into a valid filter expression. The expected filter expression includes different
|
|
461
|
-
conditions, divided by ' AND '.
|
|
462
|
-
|
|
463
|
-
:param project: The name of the project.
|
|
464
|
-
:param model: The name of the model to filter by.
|
|
465
|
-
:param function: The name of the function to filter by.
|
|
466
|
-
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value of
|
|
467
|
-
a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
468
|
-
key (i.e. "key").
|
|
469
|
-
:param top_level: If True will return only routers and endpoint that are NOT children of any router.
|
|
470
|
-
|
|
471
|
-
:return: A valid filter expression as a string.
|
|
472
|
-
|
|
473
|
-
:raise MLRunInvalidArgumentError: If project value is None.
|
|
474
|
-
"""
|
|
475
|
-
|
|
476
|
-
if not project:
|
|
477
|
-
raise mlrun.errors.MLRunInvalidArgumentError("project can't be empty")
|
|
478
|
-
|
|
479
|
-
# Add project filter
|
|
480
|
-
filter_expression = [f"project=='{project}'"]
|
|
481
|
-
|
|
482
|
-
# Add function and model filters
|
|
483
|
-
if function:
|
|
484
|
-
filter_expression.append(f"function=='{function}'")
|
|
485
|
-
if model:
|
|
486
|
-
filter_expression.append(f"model=='{model}'")
|
|
487
|
-
|
|
488
|
-
# Add labels filters
|
|
489
|
-
if labels:
|
|
490
|
-
for label in labels:
|
|
491
|
-
if not label.startswith("_"):
|
|
492
|
-
label = f"_{label}"
|
|
493
|
-
|
|
494
|
-
if "=" in label:
|
|
495
|
-
lbl, value = list(map(lambda x: x.strip(), label.split("=")))
|
|
496
|
-
filter_expression.append(f"{lbl}=='{value}'")
|
|
497
|
-
else:
|
|
498
|
-
filter_expression.append(f"exists({label})")
|
|
499
|
-
|
|
500
|
-
# Apply top_level filter (remove endpoints that considered a child of a router)
|
|
501
|
-
if top_level:
|
|
502
|
-
filter_expression.append(
|
|
503
|
-
f"(endpoint_type=='{str(mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP.value)}' "
|
|
504
|
-
f"OR endpoint_type=='{str(mlrun.common.schemas.model_monitoring.EndpointType.ROUTER.value)}')"
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
return " AND ".join(filter_expression)
|
|
508
|
-
|
|
509
|
-
@staticmethod
|
|
510
|
-
def validate_old_schema_fields(endpoint: dict):
|
|
511
|
-
"""
|
|
512
|
-
Replace default null values for `error_count` and `metrics` for users that logged a model endpoint before 1.3.0.
|
|
513
|
-
In addition, this function also validates that the key name of the endpoint unique id is `uid` and not
|
|
514
|
-
`endpoint_id` that has been used before 1.3.0.
|
|
515
|
-
|
|
516
|
-
Leaving here for backwards compatibility which related to the model endpoint schema.
|
|
517
|
-
|
|
518
|
-
:param endpoint: An endpoint flattened dictionary.
|
|
519
|
-
"""
|
|
520
|
-
|
|
521
|
-
# Validate default value for `error_count`
|
|
522
|
-
# For backwards compatibility reasons, we validate that the model endpoint includes the `error_count` key
|
|
523
|
-
if (
|
|
524
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT in endpoint
|
|
525
|
-
and endpoint[
|
|
526
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT
|
|
527
|
-
]
|
|
528
|
-
== "null"
|
|
529
|
-
):
|
|
530
|
-
endpoint[
|
|
531
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT
|
|
532
|
-
] = "0"
|
|
533
|
-
|
|
534
|
-
# Validate default value for `metrics`
|
|
535
|
-
# For backwards compatibility reasons, we validate that the model endpoint includes the `metrics` key
|
|
536
|
-
if (
|
|
537
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.METRICS in endpoint
|
|
538
|
-
and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
|
|
539
|
-
== "null"
|
|
540
|
-
):
|
|
541
|
-
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
|
|
542
|
-
json.dumps(
|
|
543
|
-
{
|
|
544
|
-
mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
|
|
545
|
-
mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
|
|
546
|
-
mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
547
|
-
}
|
|
548
|
-
}
|
|
549
|
-
)
|
|
550
|
-
)
|
|
551
|
-
# Validate key `uid` instead of `endpoint_id`
|
|
552
|
-
# For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
|
|
553
|
-
if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
|
|
554
|
-
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
|
|
555
|
-
endpoint[
|
|
556
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
557
|
-
]
|
|
558
|
-
)
|
|
559
|
-
|
|
560
|
-
@staticmethod
|
|
561
|
-
def _encode_field(field: typing.Union[str, bytes]) -> bytes:
|
|
562
|
-
"""Encode a provided field. Mainly used when storing data in the KV table."""
|
|
563
|
-
|
|
564
|
-
if isinstance(field, str):
|
|
565
|
-
return field.encode("ascii")
|
|
566
|
-
return field
|
|
567
|
-
|
|
568
|
-
@staticmethod
|
|
569
|
-
def _decode_field(field: typing.Union[str, bytes]) -> str:
|
|
570
|
-
"""Decode a provided field. Mainly used when retrieving data from the KV table."""
|
|
571
|
-
|
|
572
|
-
if isinstance(field, bytes):
|
|
573
|
-
return field.decode()
|
|
574
|
-
return field
|