mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show
  1. mlrun/api/api/deps.py +14 -1
  2. mlrun/api/api/endpoints/frontend_spec.py +0 -2
  3. mlrun/api/api/endpoints/functions.py +15 -27
  4. mlrun/api/api/endpoints/grafana_proxy.py +435 -74
  5. mlrun/api/api/endpoints/healthz.py +5 -18
  6. mlrun/api/api/endpoints/model_endpoints.py +33 -37
  7. mlrun/api/api/utils.py +6 -13
  8. mlrun/api/crud/__init__.py +14 -16
  9. mlrun/api/crud/logs.py +5 -7
  10. mlrun/api/crud/model_monitoring/__init__.py +2 -2
  11. mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
  12. mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
  13. mlrun/api/crud/pipelines.py +2 -3
  14. mlrun/api/db/sqldb/models/models_mysql.py +52 -19
  15. mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
  16. mlrun/api/db/sqldb/session.py +19 -26
  17. mlrun/api/schemas/__init__.py +2 -0
  18. mlrun/api/schemas/constants.py +0 -13
  19. mlrun/api/schemas/frontend_spec.py +0 -1
  20. mlrun/api/schemas/model_endpoints.py +38 -195
  21. mlrun/api/schemas/schedule.py +2 -2
  22. mlrun/api/utils/clients/log_collector.py +5 -0
  23. mlrun/builder.py +9 -41
  24. mlrun/config.py +1 -76
  25. mlrun/data_types/__init__.py +1 -6
  26. mlrun/data_types/data_types.py +1 -3
  27. mlrun/datastore/__init__.py +2 -9
  28. mlrun/datastore/sources.py +20 -25
  29. mlrun/datastore/store_resources.py +1 -1
  30. mlrun/datastore/targets.py +34 -67
  31. mlrun/datastore/utils.py +4 -26
  32. mlrun/db/base.py +2 -4
  33. mlrun/db/filedb.py +5 -13
  34. mlrun/db/httpdb.py +32 -64
  35. mlrun/db/sqldb.py +2 -4
  36. mlrun/errors.py +0 -5
  37. mlrun/execution.py +0 -2
  38. mlrun/feature_store/api.py +8 -24
  39. mlrun/feature_store/feature_set.py +6 -28
  40. mlrun/feature_store/feature_vector.py +0 -2
  41. mlrun/feature_store/ingestion.py +11 -8
  42. mlrun/feature_store/retrieval/base.py +43 -271
  43. mlrun/feature_store/retrieval/dask_merger.py +153 -55
  44. mlrun/feature_store/retrieval/job.py +3 -12
  45. mlrun/feature_store/retrieval/local_merger.py +130 -48
  46. mlrun/feature_store/retrieval/spark_merger.py +125 -126
  47. mlrun/features.py +2 -7
  48. mlrun/model_monitoring/constants.py +6 -48
  49. mlrun/model_monitoring/helpers.py +35 -118
  50. mlrun/model_monitoring/model_monitoring_batch.py +260 -293
  51. mlrun/model_monitoring/stream_processing_fs.py +253 -220
  52. mlrun/platforms/iguazio.py +0 -33
  53. mlrun/projects/project.py +72 -34
  54. mlrun/runtimes/base.py +0 -5
  55. mlrun/runtimes/daskjob.py +0 -2
  56. mlrun/runtimes/function.py +3 -29
  57. mlrun/runtimes/kubejob.py +15 -39
  58. mlrun/runtimes/local.py +45 -7
  59. mlrun/runtimes/mpijob/abstract.py +0 -2
  60. mlrun/runtimes/mpijob/v1.py +0 -2
  61. mlrun/runtimes/pod.py +0 -2
  62. mlrun/runtimes/remotesparkjob.py +0 -2
  63. mlrun/runtimes/serving.py +0 -6
  64. mlrun/runtimes/sparkjob/abstract.py +2 -39
  65. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  66. mlrun/serving/__init__.py +1 -2
  67. mlrun/serving/routers.py +35 -35
  68. mlrun/serving/server.py +12 -22
  69. mlrun/serving/states.py +30 -162
  70. mlrun/serving/v2_serving.py +10 -13
  71. mlrun/utils/clones.py +1 -1
  72. mlrun/utils/model_monitoring.py +96 -122
  73. mlrun/utils/version/version.json +2 -2
  74. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
  75. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
  76. mlrun/api/crud/model_monitoring/grafana.py +0 -427
  77. mlrun/datastore/spark_udf.py +0 -40
  78. mlrun/model_monitoring/__init__.py +0 -44
  79. mlrun/model_monitoring/common.py +0 -112
  80. mlrun/model_monitoring/model_endpoint.py +0 -141
  81. mlrun/model_monitoring/stores/__init__.py +0 -106
  82. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
  83. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  84. mlrun/model_monitoring/stores/models/__init__.py +0 -23
  85. mlrun/model_monitoring/stores/models/base.py +0 -18
  86. mlrun/model_monitoring/stores/models/mysql.py +0 -100
  87. mlrun/model_monitoring/stores/models/sqlite.py +0 -98
  88. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
  89. mlrun/utils/db.py +0 -52
  90. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
  91. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
  92. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
  93. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
@@ -1,141 +0,0 @@
1
- # Copyright 2018 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- #
15
-
16
- from typing import Any, Dict, List, Optional
17
-
18
- import mlrun.model
19
-
20
- from .common import EndpointType, ModelMonitoringMode
21
- from .constants import EventKeyMetrics, EventLiveStats
22
-
23
-
24
- class ModelEndpointSpec(mlrun.model.ModelObj):
25
- def __init__(
26
- self,
27
- function_uri: Optional[str] = "",
28
- model: Optional[str] = "",
29
- model_class: Optional[str] = "",
30
- model_uri: Optional[str] = "",
31
- feature_names: Optional[List[str]] = None,
32
- label_names: Optional[List[str]] = None,
33
- stream_path: Optional[str] = "",
34
- algorithm: Optional[str] = "",
35
- monitor_configuration: Optional[dict] = None,
36
- active: Optional[bool] = True,
37
- monitoring_mode: Optional[ModelMonitoringMode] = ModelMonitoringMode.disabled,
38
- ):
39
- self.function_uri = function_uri # <project_name>/<function_name>:<tag>
40
- self.model = model # <model_name>:<version>
41
- self.model_class = model_class
42
- self.model_uri = model_uri
43
- self.feature_names = feature_names or []
44
- self.label_names = label_names or []
45
- self.stream_path = stream_path
46
- self.algorithm = algorithm
47
- self.monitor_configuration = monitor_configuration or {}
48
- self.active = active
49
- self.monitoring_mode = monitoring_mode
50
-
51
-
52
- class ModelEndpointStatus(mlrun.model.ModelObj):
53
- def __init__(
54
- self,
55
- feature_stats: Optional[dict] = None,
56
- current_stats: Optional[dict] = None,
57
- first_request: Optional[str] = "",
58
- last_request: Optional[str] = "",
59
- error_count: Optional[int] = 0,
60
- drift_status: Optional[str] = "",
61
- drift_measures: Optional[dict] = None,
62
- metrics: Optional[Dict[str, Dict[str, Any]]] = None,
63
- features: Optional[List[Dict[str, Any]]] = None,
64
- children: Optional[List[str]] = None,
65
- children_uids: Optional[List[str]] = None,
66
- endpoint_type: Optional[EndpointType] = EndpointType.NODE_EP.value,
67
- monitoring_feature_set_uri: Optional[str] = "",
68
- state: Optional[str] = "",
69
- ):
70
- self.feature_stats = feature_stats or {}
71
- self.current_stats = current_stats or {}
72
- self.first_request = first_request
73
- self.last_request = last_request
74
- self.error_count = error_count
75
- self.drift_status = drift_status
76
- self.drift_measures = drift_measures or {}
77
- self.features = features or []
78
- self.children = children or []
79
- self.children_uids = children_uids or []
80
- self.endpoint_type = endpoint_type
81
- self.monitoring_feature_set_uri = monitoring_feature_set_uri
82
- if metrics is None:
83
- self.metrics = {
84
- EventKeyMetrics.GENERIC: {
85
- EventLiveStats.LATENCY_AVG_1H: 0,
86
- EventLiveStats.PREDICTIONS_PER_SECOND: 0,
87
- }
88
- }
89
- self.state = state
90
-
91
-
92
- class ModelEndpoint(mlrun.model.ModelObj):
93
- kind = "model-endpoint"
94
- _dict_fields = ["kind", "metadata", "spec", "status"]
95
-
96
- def __init__(self):
97
- self._status: ModelEndpointStatus = ModelEndpointStatus()
98
- self._spec: ModelEndpointSpec = ModelEndpointSpec()
99
- self._metadata: mlrun.model.VersionedObjMetadata = (
100
- mlrun.model.VersionedObjMetadata()
101
- )
102
-
103
- @property
104
- def status(self) -> ModelEndpointStatus:
105
- return self._status
106
-
107
- @status.setter
108
- def status(self, status):
109
- self._status = self._verify_dict(status, "status", ModelEndpointStatus)
110
-
111
- @property
112
- def spec(self) -> ModelEndpointSpec:
113
- return self._spec
114
-
115
- @spec.setter
116
- def spec(self, spec):
117
- self._spec = self._verify_dict(spec, "spec", ModelEndpointSpec)
118
-
119
- @property
120
- def metadata(self) -> mlrun.model.VersionedObjMetadata:
121
- return self._metadata
122
-
123
- @metadata.setter
124
- def metadata(self, metadata):
125
- self._metadata = self._verify_dict(
126
- metadata, "metadata", mlrun.model.VersionedObjMetadata
127
- )
128
-
129
- @classmethod
130
- def from_flat_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
131
- new_obj = cls()
132
- new_obj._metadata = mlrun.model.VersionedObjMetadata().from_dict(
133
- struct=struct, fields=fields, deprecated_fields=deprecated_fields
134
- )
135
- new_obj._status = ModelEndpointStatus().from_dict(
136
- struct=struct, fields=fields, deprecated_fields=deprecated_fields
137
- )
138
- new_obj._spec = ModelEndpointSpec().from_dict(
139
- struct=struct, fields=fields, deprecated_fields=deprecated_fields
140
- )
141
- return new_obj
@@ -1,106 +0,0 @@
1
- # Copyright 2018 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
- import enum
18
- import typing
19
-
20
- import mlrun
21
-
22
- from .model_endpoint_store import ModelEndpointStore
23
-
24
-
25
- class ModelEndpointStoreType(enum.Enum):
26
- """Enum class to handle the different store type values for saving a model endpoint record."""
27
-
28
- v3io_nosql = "v3io-nosql"
29
- SQL = "sql"
30
-
31
- def to_endpoint_store(
32
- self,
33
- project: str,
34
- access_key: str = None,
35
- endpoint_store_connection: str = None,
36
- ) -> ModelEndpointStore:
37
- """
38
- Return a ModelEndpointStore object based on the provided enum value.
39
-
40
- :param project: The name of the project.
41
- :param access_key: Access key with permission to the DB table. Note that if access key is None
42
- and the endpoint target is from type KV then the access key will be
43
- retrieved from the environment variable.
44
- :param endpoint_store_connection: A valid connection string for model endpoint target. Contains several
45
- key-value pairs that required for the database connection.
46
- e.g. A root user with password 1234, tries to connect a schema called
47
- mlrun within a local MySQL DB instance:
48
- 'mysql+pymysql://root:1234@localhost:3306/mlrun'.
49
-
50
- :return: `ModelEndpointStore` object.
51
-
52
- """
53
-
54
- if self.value == ModelEndpointStoreType.v3io_nosql.value:
55
-
56
- from .kv_model_endpoint_store import KVModelEndpointStore
57
-
58
- # Get V3IO access key from env
59
- access_key = access_key or mlrun.mlconf.get_v3io_access_key()
60
-
61
- return KVModelEndpointStore(project=project, access_key=access_key)
62
-
63
- # Assuming SQL store target if store type is not KV.
64
- # Update these lines once there are more than two store target types.
65
- from mlrun.utils.model_monitoring import get_connection_string
66
-
67
- sql_connection_string = endpoint_store_connection or get_connection_string(
68
- project=project
69
- )
70
- from .sql_model_endpoint_store import SQLModelEndpointStore
71
-
72
- return SQLModelEndpointStore(
73
- project=project, sql_connection_string=sql_connection_string
74
- )
75
-
76
- @classmethod
77
- def _missing_(cls, value: typing.Any):
78
- """A lookup function to handle an invalid value.
79
- :param value: Provided enum (invalid) value.
80
- """
81
- valid_values = list(cls.__members__.keys())
82
- raise mlrun.errors.MLRunInvalidArgumentError(
83
- f"{value} is not a valid endpoint store, please choose a valid value: %{valid_values}."
84
- )
85
-
86
-
87
- def get_model_endpoint_store(
88
- project: str, access_key: str = None
89
- ) -> ModelEndpointStore:
90
- """
91
- Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
92
-
93
- :param project: The name of the project.
94
- :param access_key: Access key with permission to the DB table.
95
-
96
- :return: `ModelEndpointStore` object. Using this object, the user can apply different operations on the
97
- model endpoint record such as write, update, get and delete.
98
- """
99
-
100
- # Get store type value from ModelEndpointStoreType enum class
101
- model_endpoint_store_type = ModelEndpointStoreType(
102
- mlrun.mlconf.model_endpoint_monitoring.store_type
103
- )
104
-
105
- # Convert into model endpoint store target object
106
- return model_endpoint_store_type.to_endpoint_store(project, access_key)
@@ -1,448 +0,0 @@
1
- # Copyright 2018 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- #
15
-
16
- import os
17
- import typing
18
-
19
- import v3io.dataplane
20
- import v3io_frames
21
-
22
- import mlrun
23
- import mlrun.model_monitoring.constants as model_monitoring_constants
24
- import mlrun.utils.model_monitoring
25
- import mlrun.utils.v3io_clients
26
- from mlrun.utils import logger
27
-
28
- from .model_endpoint_store import ModelEndpointStore
29
-
30
-
31
- class KVModelEndpointStore(ModelEndpointStore):
32
- """
33
- Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
34
- client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
35
- """
36
-
37
- def __init__(self, project: str, access_key: str):
38
- super().__init__(project=project)
39
- # Initialize a V3IO client instance
40
- self.access_key = access_key or os.environ.get("V3IO_ACCESS_KEY")
41
- self.client = mlrun.utils.v3io_clients.get_v3io_client(
42
- endpoint=mlrun.mlconf.v3io_api, access_key=self.access_key
43
- )
44
- # Get the KV table path and container
45
- self.path, self.container = self._get_path_and_container()
46
-
47
- def write_model_endpoint(self, endpoint: typing.Dict[str, typing.Any]):
48
- """
49
- Create a new endpoint record in the KV table.
50
-
51
- :param endpoint: model endpoint dictionary that will be written into the DB.
52
- """
53
-
54
- self.client.kv.put(
55
- container=self.container,
56
- table_path=self.path,
57
- key=endpoint[model_monitoring_constants.EventFieldType.UID],
58
- attributes=endpoint,
59
- )
60
-
61
- def update_model_endpoint(
62
- self, endpoint_id: str, attributes: typing.Dict[str, typing.Any]
63
- ):
64
- """
65
- Update a model endpoint record with a given attributes.
66
-
67
- :param endpoint_id: The unique id of the model endpoint.
68
- :param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
69
- of the attributes dictionary should exist in the KV table.
70
-
71
- """
72
-
73
- self.client.kv.update(
74
- container=self.container,
75
- table_path=self.path,
76
- key=endpoint_id,
77
- attributes=attributes,
78
- )
79
-
80
- def delete_model_endpoint(
81
- self,
82
- endpoint_id: str,
83
- ):
84
- """
85
- Deletes the KV record of a given model endpoint id.
86
-
87
- :param endpoint_id: The unique id of the model endpoint.
88
- """
89
-
90
- self.client.kv.delete(
91
- container=self.container,
92
- table_path=self.path,
93
- key=endpoint_id,
94
- )
95
-
96
- def get_model_endpoint(
97
- self,
98
- endpoint_id: str,
99
- ) -> typing.Dict[str, typing.Any]:
100
- """
101
- Get a single model endpoint record.
102
-
103
- :param endpoint_id: The unique id of the model endpoint.
104
-
105
- :return: A model endpoint record as a dictionary.
106
-
107
- :raise MLRunNotFoundError: If the endpoint was not found.
108
- """
109
-
110
- # Getting the raw data from the KV table
111
- endpoint = self.client.kv.get(
112
- container=self.container,
113
- table_path=self.path,
114
- key=endpoint_id,
115
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
116
- access_key=self.access_key,
117
- )
118
- endpoint = endpoint.output.item
119
-
120
- if not endpoint:
121
- raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
122
-
123
- # For backwards compatability: replace null values for `error_count` and `metrics`
124
- mlrun.utils.model_monitoring.validate_errors_and_metrics(endpoint=endpoint)
125
-
126
- return endpoint
127
-
128
- def _get_path_and_container(self):
129
- """Getting path and container based on the model monitoring configurations"""
130
- path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
131
- project=self.project,
132
- kind=model_monitoring_constants.ModelMonitoringStoreKinds.ENDPOINTS,
133
- )
134
- (
135
- _,
136
- container,
137
- path,
138
- ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(path)
139
- return path, container
140
-
141
- def list_model_endpoints(
142
- self,
143
- model: str = None,
144
- function: str = None,
145
- labels: typing.List[str] = None,
146
- top_level: bool = None,
147
- uids: typing.List = None,
148
- ) -> typing.List[typing.Dict[str, typing.Any]]:
149
- """
150
- Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
151
- By default, when no filters are applied, all available model endpoints for the given project will
152
- be listed.
153
-
154
- :param model: The name of the model to filter by.
155
- :param function: The name of the function to filter by.
156
- :param labels: A list of labels to filter by. Label filters work by either filtering a specific value
157
- of a label (i.e. list("key=value")) or by looking for the existence of a given
158
- key (i.e. "key").
159
- :param top_level: If True will return only routers and endpoint that are NOT children of any router.
160
- :param uids: List of model endpoint unique ids to include in the result.
161
-
162
-
163
- :return: A list of model endpoint dictionaries.
164
- """
165
-
166
- # # Initialize an empty model endpoints list
167
- endpoint_list = []
168
-
169
- # Retrieve the raw data from the KV table and get the endpoint ids
170
- try:
171
- cursor = self.client.kv.new_cursor(
172
- container=self.container,
173
- table_path=self.path,
174
- filter_expression=self._build_kv_cursor_filter_expression(
175
- self.project,
176
- function,
177
- model,
178
- labels,
179
- top_level,
180
- ),
181
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
182
- )
183
- items = cursor.all()
184
-
185
- except Exception as exc:
186
- logger.warning("Failed retrieving raw data from kv table", exc=exc)
187
- return endpoint_list
188
-
189
- # Create a list of model endpoints unique ids
190
- if uids is None:
191
- uids = []
192
- for item in items:
193
- if model_monitoring_constants.EventFieldType.UID not in item:
194
- # This is kept for backwards compatibility - in old versions the key column named endpoint_id
195
- uids.append(
196
- item[model_monitoring_constants.EventFieldType.ENDPOINT_ID]
197
- )
198
- else:
199
- uids.append(item[model_monitoring_constants.EventFieldType.UID])
200
-
201
- # Add each relevant model endpoint to the model endpoints list
202
- for endpoint_id in uids:
203
- endpoint = self.get_model_endpoint(
204
- endpoint_id=endpoint_id,
205
- )
206
- endpoint_list.append(endpoint)
207
-
208
- return endpoint_list
209
-
210
- def delete_model_endpoints_resources(
211
- self, endpoints: typing.List[typing.Dict[str, typing.Any]]
212
- ):
213
- """
214
- Delete all model endpoints resources in both KV and the time series DB.
215
-
216
- :param endpoints: A list of model endpoints flattened dictionaries.
217
- """
218
-
219
- # Delete model endpoint record from KV table
220
- for endpoint_dict in endpoints:
221
- if model_monitoring_constants.EventFieldType.UID not in endpoint_dict:
222
- # This is kept for backwards compatibility - in old versions the key column named endpoint_id
223
- endpoint_id = endpoint_dict[
224
- model_monitoring_constants.EventFieldType.ENDPOINT_ID
225
- ]
226
- else:
227
- endpoint_id = endpoint_dict[
228
- model_monitoring_constants.EventFieldType.UID
229
- ]
230
- self.delete_model_endpoint(
231
- endpoint_id,
232
- )
233
-
234
- # Delete remain records in the KV
235
- all_records = self.client.kv.new_cursor(
236
- container=self.container,
237
- table_path=self.path,
238
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
239
- ).all()
240
-
241
- all_records = [r["__name"] for r in all_records]
242
-
243
- # Cleanup KV
244
- for record in all_records:
245
- self.client.kv.delete(
246
- container=self.container,
247
- table_path=self.path,
248
- key=record,
249
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
250
- )
251
-
252
- # Cleanup TSDB
253
- frames = mlrun.utils.v3io_clients.get_frames_client(
254
- token=self.access_key,
255
- address=mlrun.mlconf.v3io_framesd,
256
- container=self.container,
257
- )
258
-
259
- # Generate the required tsdb paths
260
- tsdb_path, filtered_path = self._generate_tsdb_paths()
261
-
262
- # Delete time series DB resources
263
- try:
264
- frames.delete(
265
- backend=model_monitoring_constants.TimeSeriesTarget.TSDB,
266
- table=filtered_path,
267
- )
268
- except (v3io_frames.errors.DeleteError, v3io_frames.errors.CreateError) as e:
269
- # Frames might raise an exception if schema file does not exist.
270
- logger.warning("Failed to delete TSDB schema file:", err=e)
271
- pass
272
-
273
- # Final cleanup of tsdb path
274
- tsdb_path.replace("://u", ":///u")
275
- store, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
276
- store.rm(tsdb_path, recursive=True)
277
-
278
- def get_endpoint_real_time_metrics(
279
- self,
280
- endpoint_id: str,
281
- metrics: typing.List[str],
282
- start: str = "now-1h",
283
- end: str = "now",
284
- access_key: str = None,
285
- ) -> typing.Dict[str, typing.List[typing.Tuple[str, float]]]:
286
- """
287
- Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
288
- `predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
289
-
290
- :param endpoint_id: The unique id of the model endpoint.
291
- :param metrics: A list of real-time metrics to return for the model endpoint.
292
- :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
293
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
294
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
295
- earliest time.
296
- :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
297
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
298
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
299
- earliest time.
300
- :param access_key: V3IO access key that will be used for generating Frames client object. If not
301
- provided, the access key will be retrieved from the environment variables.
302
-
303
- :return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
304
- includes timestamps and the values.
305
- """
306
-
307
- # Initialize access key
308
- access_key = access_key or mlrun.mlconf.get_v3io_access_key()
309
-
310
- if not metrics:
311
- raise mlrun.errors.MLRunInvalidArgumentError(
312
- "Metric names must be provided"
313
- )
314
-
315
- # Initialize metrics mapping dictionary
316
- metrics_mapping = {}
317
-
318
- # Getting the path for the time series DB
319
- events_path = (
320
- mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
321
- project=self.project,
322
- kind=model_monitoring_constants.ModelMonitoringStoreKinds.EVENTS,
323
- )
324
- )
325
- (
326
- _,
327
- container,
328
- events_path,
329
- ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(events_path)
330
-
331
- # Retrieve the raw data from the time series DB based on the provided metrics and time ranges
332
- frames_client = mlrun.utils.v3io_clients.get_frames_client(
333
- token=access_key,
334
- address=mlrun.mlconf.v3io_framesd,
335
- container=container,
336
- )
337
-
338
- try:
339
- data = frames_client.read(
340
- backend=model_monitoring_constants.TimeSeriesTarget.TSDB,
341
- table=events_path,
342
- columns=["endpoint_id", *metrics],
343
- filter=f"endpoint_id=='{endpoint_id}'",
344
- start=start,
345
- end=end,
346
- )
347
-
348
- # Fill the metrics mapping dictionary with the metric name and values
349
- data_dict = data.to_dict()
350
- for metric in metrics:
351
- metric_data = data_dict.get(metric)
352
- if metric_data is None:
353
- continue
354
-
355
- values = [
356
- (str(timestamp), value) for timestamp, value in metric_data.items()
357
- ]
358
- metrics_mapping[metric] = values
359
-
360
- except v3io_frames.errors.ReadError:
361
- logger.warn("Failed to read tsdb", endpoint=endpoint_id)
362
-
363
- return metrics_mapping
364
-
365
- def _generate_tsdb_paths(self) -> typing.Tuple[str, str]:
366
- """Generate a short path to the TSDB resources and a filtered path for the frames object
367
- :return: A tuple of:
368
- [0] = Short path to the TSDB resources
369
- [1] = Filtered path to TSDB events without schema and container
370
- """
371
- # Full path for the time series DB events
372
- full_path = (
373
- mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
374
- project=self.project,
375
- kind=model_monitoring_constants.ModelMonitoringStoreKinds.EVENTS,
376
- )
377
- )
378
-
379
- # Generate the main directory with the TSDB resources
380
- tsdb_path = mlrun.utils.model_monitoring.parse_model_endpoint_project_prefix(
381
- full_path, self.project
382
- )
383
-
384
- # Generate filtered path without schema and container as required by the frames object
385
- (
386
- _,
387
- _,
388
- filtered_path,
389
- ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(full_path)
390
- return tsdb_path, filtered_path
391
-
392
- @staticmethod
393
- def _build_kv_cursor_filter_expression(
394
- project: str,
395
- function: str = None,
396
- model: str = None,
397
- labels: typing.List[str] = None,
398
- top_level: bool = False,
399
- ) -> str:
400
- """
401
- Convert the provided filters into a valid filter expression. The expected filter expression includes different
402
- conditions, divided by ' AND '.
403
-
404
- :param project: The name of the project.
405
- :param model: The name of the model to filter by.
406
- :param function: The name of the function to filter by.
407
- :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of
408
- a label (i.e. list("key=value")) or by looking for the existence of a given
409
- key (i.e. "key").
410
- :param top_level: If True will return only routers and endpoint that are NOT children of any router.
411
-
412
- :return: A valid filter expression as a string.
413
-
414
- :raise MLRunInvalidArgumentError: If project value is None.
415
- """
416
-
417
- if not project:
418
- raise mlrun.errors.MLRunInvalidArgumentError("project can't be empty")
419
-
420
- # Add project filter
421
- filter_expression = [f"project=='{project}'"]
422
-
423
- # Add function and model filters
424
- if function:
425
- filter_expression.append(f"function=='{function}'")
426
- if model:
427
- filter_expression.append(f"model=='{model}'")
428
-
429
- # Add labels filters
430
- if labels:
431
- for label in labels:
432
- if not label.startswith("_"):
433
- label = f"_{label}"
434
-
435
- if "=" in label:
436
- lbl, value = list(map(lambda x: x.strip(), label.split("=")))
437
- filter_expression.append(f"{lbl}=='{value}'")
438
- else:
439
- filter_expression.append(f"exists({label})")
440
-
441
- # Apply top_level filter (remove endpoints that considered a child of a router)
442
- if top_level:
443
- filter_expression.append(
444
- f"(endpoint_type=='{str(mlrun.model_monitoring.EndpointType.NODE_EP.value)}' "
445
- f"OR endpoint_type=='{str(mlrun.model_monitoring.EndpointType.ROUTER.value)}')"
446
- )
447
-
448
- return " AND ".join(filter_expression)