mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -1,574 +0,0 @@
1
- # Copyright 2023 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- #
15
-
16
- import json
17
- import os
18
- import typing
19
-
20
- import v3io.dataplane
21
- import v3io_frames
22
-
23
- import mlrun.common.model_monitoring.helpers
24
- import mlrun.common.schemas.model_monitoring
25
- import mlrun.utils.v3io_clients
26
- from mlrun.utils import logger
27
-
28
- from .model_endpoint_store import ModelEndpointStore
29
-
30
- # Fields to encode before storing in the KV table or to decode after retrieving
31
- fields_to_encode_decode = [
32
- mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS,
33
- mlrun.common.schemas.model_monitoring.EventFieldType.CURRENT_STATS,
34
- ]
35
-
36
-
37
- class KVModelEndpointStore(ModelEndpointStore):
38
- """
39
- Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
40
- client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
41
- """
42
-
43
- def __init__(self, project: str, access_key: str):
44
- super().__init__(project=project)
45
- # Initialize a V3IO client instance
46
- self.access_key = access_key or os.environ.get("V3IO_ACCESS_KEY")
47
- self.client = mlrun.utils.v3io_clients.get_v3io_client(
48
- endpoint=mlrun.mlconf.v3io_api, access_key=self.access_key
49
- )
50
- # Get the KV table path and container
51
- self.path, self.container = self._get_path_and_container()
52
-
53
- def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
54
- """
55
- Create a new endpoint record in the KV table.
56
-
57
- :param endpoint: model endpoint dictionary that will be written into the DB.
58
- """
59
-
60
- for field in fields_to_encode_decode:
61
- if field in endpoint:
62
- # Encode to binary data
63
- endpoint[field] = self._encode_field(endpoint[field])
64
-
65
- self.client.kv.put(
66
- container=self.container,
67
- table_path=self.path,
68
- key=endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID],
69
- attributes=endpoint,
70
- )
71
-
72
- self._infer_kv_schema()
73
-
74
- def update_model_endpoint(
75
- self, endpoint_id: str, attributes: dict[str, typing.Any]
76
- ):
77
- """
78
- Update a model endpoint record with a given attributes.
79
-
80
- :param endpoint_id: The unique id of the model endpoint.
81
- :param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
82
- of the attributes dictionary should exist in the KV table.
83
-
84
- """
85
-
86
- for field in fields_to_encode_decode:
87
- if field in attributes:
88
- # Encode to binary data
89
- attributes[field] = self._encode_field(attributes[field])
90
-
91
- self.client.kv.update(
92
- container=self.container,
93
- table_path=self.path,
94
- key=endpoint_id,
95
- attributes=attributes,
96
- )
97
-
98
- def delete_model_endpoint(
99
- self,
100
- endpoint_id: str,
101
- ):
102
- """
103
- Deletes the KV record of a given model endpoint id.
104
-
105
- :param endpoint_id: The unique id of the model endpoint.
106
- """
107
-
108
- self.client.kv.delete(
109
- container=self.container,
110
- table_path=self.path,
111
- key=endpoint_id,
112
- )
113
-
114
- def get_model_endpoint(
115
- self,
116
- endpoint_id: str,
117
- ) -> dict[str, typing.Any]:
118
- """
119
- Get a single model endpoint record.
120
-
121
- :param endpoint_id: The unique id of the model endpoint.
122
-
123
- :return: A model endpoint record as a dictionary.
124
-
125
- :raise MLRunNotFoundError: If the endpoint was not found.
126
- """
127
-
128
- # Getting the raw data from the KV table
129
- endpoint = self.client.kv.get(
130
- container=self.container,
131
- table_path=self.path,
132
- key=endpoint_id,
133
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
134
- access_key=self.access_key,
135
- )
136
- endpoint = endpoint.output.item
137
-
138
- for field in fields_to_encode_decode:
139
- if field in endpoint:
140
- # Decode binary data
141
- endpoint[field] = self._decode_field(endpoint[field])
142
-
143
- if not endpoint:
144
- raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
145
-
146
- # For backwards compatability: replace null values for `error_count` and `metrics`
147
- self.validate_old_schema_fields(endpoint=endpoint)
148
-
149
- return endpoint
150
-
151
- def _get_path_and_container(self):
152
- """Getting path and container based on the model monitoring configurations"""
153
- path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
154
- project=self.project,
155
- kind=mlrun.common.schemas.ModelMonitoringStoreKinds.ENDPOINTS,
156
- )
157
- (
158
- _,
159
- container,
160
- path,
161
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
162
- path
163
- )
164
- return path, container
165
-
166
- def list_model_endpoints(
167
- self,
168
- model: str = None,
169
- function: str = None,
170
- labels: list[str] = None,
171
- top_level: bool = None,
172
- uids: list = None,
173
- ) -> list[dict[str, typing.Any]]:
174
- """
175
- Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
176
- By default, when no filters are applied, all available model endpoints for the given project will
177
- be listed.
178
-
179
- :param model: The name of the model to filter by.
180
- :param function: The name of the function to filter by.
181
- :param labels: A list of labels to filter by. Label filters work by either filtering a specific value
182
- of a label (i.e. list("key=value")) or by looking for the existence of a given
183
- key (i.e. "key").
184
- :param top_level: If True will return only routers and endpoint that are NOT children of any router.
185
- :param uids: List of model endpoint unique ids to include in the result.
186
-
187
-
188
- :return: A list of model endpoint dictionaries.
189
- """
190
-
191
- # # Initialize an empty model endpoints list
192
- endpoint_list = []
193
-
194
- # Retrieve the raw data from the KV table and get the endpoint ids
195
- try:
196
- cursor = self.client.kv.new_cursor(
197
- container=self.container,
198
- table_path=self.path,
199
- filter_expression=self._build_kv_cursor_filter_expression(
200
- self.project,
201
- function,
202
- model,
203
- labels,
204
- top_level,
205
- ),
206
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
207
- )
208
- items = cursor.all()
209
-
210
- except Exception as exc:
211
- logger.warning(
212
- "Failed retrieving raw data from kv table",
213
- exc=mlrun.errors.err_to_str(exc),
214
- )
215
- return endpoint_list
216
-
217
- # Create a list of model endpoints unique ids
218
- if uids is None:
219
- uids = []
220
- for item in items:
221
- if mlrun.common.schemas.model_monitoring.EventFieldType.UID not in item:
222
- # This is kept for backwards compatibility - in old versions the key column named endpoint_id
223
- uids.append(
224
- item[
225
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
226
- ]
227
- )
228
- else:
229
- uids.append(
230
- item[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
231
- )
232
-
233
- # Add each relevant model endpoint to the model endpoints list
234
- for endpoint_id in uids:
235
- endpoint = self.get_model_endpoint(
236
- endpoint_id=endpoint_id,
237
- )
238
- endpoint_list.append(endpoint)
239
-
240
- return endpoint_list
241
-
242
- def delete_model_endpoints_resources(self, endpoints: list[dict[str, typing.Any]]):
243
- """
244
- Delete all model endpoints resources in both KV and the time series DB.
245
-
246
- :param endpoints: A list of model endpoints flattened dictionaries.
247
- """
248
-
249
- # Delete model endpoint record from KV table
250
- for endpoint_dict in endpoints:
251
- if (
252
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
253
- not in endpoint_dict
254
- ):
255
- # This is kept for backwards compatibility - in old versions the key column named endpoint_id
256
- endpoint_id = endpoint_dict[
257
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
258
- ]
259
- else:
260
- endpoint_id = endpoint_dict[
261
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
262
- ]
263
- self.delete_model_endpoint(
264
- endpoint_id,
265
- )
266
-
267
- # Delete remain records in the KV
268
- all_records = self.client.kv.new_cursor(
269
- container=self.container,
270
- table_path=self.path,
271
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
272
- ).all()
273
-
274
- all_records = [r["__name"] for r in all_records]
275
-
276
- # Cleanup KV
277
- for record in all_records:
278
- self.client.kv.delete(
279
- container=self.container,
280
- table_path=self.path,
281
- key=record,
282
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
283
- )
284
-
285
- # Cleanup TSDB
286
- frames = self._get_frames_client()
287
-
288
- # Generate the required tsdb paths
289
- tsdb_path, filtered_path = self._generate_tsdb_paths()
290
-
291
- # Delete time series DB resources
292
- try:
293
- frames.delete(
294
- backend=mlrun.common.schemas.model_monitoring.TimeSeriesTarget.TSDB,
295
- table=filtered_path,
296
- )
297
- except v3io_frames.errors.DeleteError as e:
298
- if "No TSDB schema file found" not in str(e):
299
- logger.warning(
300
- f"Failed to delete TSDB table '{filtered_path}'",
301
- err=mlrun.errors.err_to_str(e),
302
- )
303
- # Final cleanup of tsdb path
304
- tsdb_path.replace("://u", ":///u")
305
- store, _, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
306
- store.rm(tsdb_path, recursive=True)
307
-
308
- def get_endpoint_real_time_metrics(
309
- self,
310
- endpoint_id: str,
311
- metrics: list[str],
312
- start: str = "now-1h",
313
- end: str = "now",
314
- access_key: str = None,
315
- ) -> dict[str, list[tuple[str, float]]]:
316
- """
317
- Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
318
- `predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
319
-
320
- :param endpoint_id: The unique id of the model endpoint.
321
- :param metrics: A list of real-time metrics to return for the model endpoint.
322
- :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
323
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
324
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
325
- earliest time.
326
- :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
327
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
328
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
329
- earliest time.
330
- :param access_key: V3IO access key that will be used for generating Frames client object. If not
331
- provided, the access key will be retrieved from the environment variables.
332
-
333
- :return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
334
- includes timestamps and the values.
335
- """
336
-
337
- # Initialize access key
338
- access_key = access_key or mlrun.mlconf.get_v3io_access_key()
339
-
340
- if not metrics:
341
- raise mlrun.errors.MLRunInvalidArgumentError(
342
- "Metric names must be provided"
343
- )
344
-
345
- # Initialize metrics mapping dictionary
346
- metrics_mapping = {}
347
-
348
- # Getting the path for the time series DB
349
- events_path = (
350
- mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
351
- project=self.project,
352
- kind=mlrun.common.schemas.ModelMonitoringStoreKinds.EVENTS,
353
- )
354
- )
355
- (
356
- _,
357
- container,
358
- events_path,
359
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
360
- events_path
361
- )
362
-
363
- # Retrieve the raw data from the time series DB based on the provided metrics and time ranges
364
- frames_client = mlrun.utils.v3io_clients.get_frames_client(
365
- token=access_key,
366
- address=mlrun.mlconf.v3io_framesd,
367
- container=container,
368
- )
369
-
370
- try:
371
- data = frames_client.read(
372
- backend=mlrun.common.schemas.model_monitoring.TimeSeriesTarget.TSDB,
373
- table=events_path,
374
- columns=["endpoint_id", *metrics],
375
- filter=f"endpoint_id=='{endpoint_id}'",
376
- start=start,
377
- end=end,
378
- )
379
-
380
- # Fill the metrics mapping dictionary with the metric name and values
381
- data_dict = data.to_dict()
382
- for metric in metrics:
383
- metric_data = data_dict.get(metric)
384
- if metric_data is None:
385
- continue
386
-
387
- values = [
388
- (str(timestamp), value) for timestamp, value in metric_data.items()
389
- ]
390
- metrics_mapping[metric] = values
391
-
392
- except v3io_frames.errors.ReadError:
393
- logger.warn("Failed to read tsdb", endpoint=endpoint_id)
394
-
395
- return metrics_mapping
396
-
397
- def _generate_tsdb_paths(self) -> tuple[str, str]:
398
- """Generate a short path to the TSDB resources and a filtered path for the frames object
399
- :return: A tuple of:
400
- [0] = Short path to the TSDB resources
401
- [1] = Filtered path to TSDB events without schema and container
402
- """
403
- # Full path for the time series DB events
404
- full_path = (
405
- mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
406
- project=self.project,
407
- kind=mlrun.common.schemas.ModelMonitoringStoreKinds.EVENTS,
408
- )
409
- )
410
-
411
- # Generate the main directory with the TSDB resources
412
- tsdb_path = (
413
- mlrun.common.model_monitoring.helpers.parse_model_endpoint_project_prefix(
414
- full_path, self.project
415
- )
416
- )
417
-
418
- # Generate filtered path without schema and container as required by the frames object
419
- (
420
- _,
421
- _,
422
- filtered_path,
423
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
424
- full_path
425
- )
426
- return tsdb_path, filtered_path
427
-
428
- def _infer_kv_schema(self):
429
- """
430
- Create KV schema file if not exist. This schema is being used by the Grafana dashboards.
431
- """
432
-
433
- schema_file = self.client.kv.new_cursor(
434
- container=self.container,
435
- table_path=self.path,
436
- filter_expression='__name==".#schema"',
437
- )
438
-
439
- if not schema_file.all():
440
- logger.info("Generate a new V3IO KV schema file", kv_table_path=self.path)
441
- frames_client = self._get_frames_client()
442
- frames_client.execute(backend="kv", table=self.path, command="infer_schema")
443
-
444
- def _get_frames_client(self):
445
- return mlrun.utils.v3io_clients.get_frames_client(
446
- token=self.access_key,
447
- address=mlrun.mlconf.v3io_framesd,
448
- container=self.container,
449
- )
450
-
451
- @staticmethod
452
- def _build_kv_cursor_filter_expression(
453
- project: str,
454
- function: str = None,
455
- model: str = None,
456
- labels: list[str] = None,
457
- top_level: bool = False,
458
- ) -> str:
459
- """
460
- Convert the provided filters into a valid filter expression. The expected filter expression includes different
461
- conditions, divided by ' AND '.
462
-
463
- :param project: The name of the project.
464
- :param model: The name of the model to filter by.
465
- :param function: The name of the function to filter by.
466
- :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of
467
- a label (i.e. list("key=value")) or by looking for the existence of a given
468
- key (i.e. "key").
469
- :param top_level: If True will return only routers and endpoint that are NOT children of any router.
470
-
471
- :return: A valid filter expression as a string.
472
-
473
- :raise MLRunInvalidArgumentError: If project value is None.
474
- """
475
-
476
- if not project:
477
- raise mlrun.errors.MLRunInvalidArgumentError("project can't be empty")
478
-
479
- # Add project filter
480
- filter_expression = [f"project=='{project}'"]
481
-
482
- # Add function and model filters
483
- if function:
484
- filter_expression.append(f"function=='{function}'")
485
- if model:
486
- filter_expression.append(f"model=='{model}'")
487
-
488
- # Add labels filters
489
- if labels:
490
- for label in labels:
491
- if not label.startswith("_"):
492
- label = f"_{label}"
493
-
494
- if "=" in label:
495
- lbl, value = list(map(lambda x: x.strip(), label.split("=")))
496
- filter_expression.append(f"{lbl}=='{value}'")
497
- else:
498
- filter_expression.append(f"exists({label})")
499
-
500
- # Apply top_level filter (remove endpoints that considered a child of a router)
501
- if top_level:
502
- filter_expression.append(
503
- f"(endpoint_type=='{str(mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP.value)}' "
504
- f"OR endpoint_type=='{str(mlrun.common.schemas.model_monitoring.EndpointType.ROUTER.value)}')"
505
- )
506
-
507
- return " AND ".join(filter_expression)
508
-
509
- @staticmethod
510
- def validate_old_schema_fields(endpoint: dict):
511
- """
512
- Replace default null values for `error_count` and `metrics` for users that logged a model endpoint before 1.3.0.
513
- In addition, this function also validates that the key name of the endpoint unique id is `uid` and not
514
- `endpoint_id` that has been used before 1.3.0.
515
-
516
- Leaving here for backwards compatibility which related to the model endpoint schema.
517
-
518
- :param endpoint: An endpoint flattened dictionary.
519
- """
520
-
521
- # Validate default value for `error_count`
522
- # For backwards compatibility reasons, we validate that the model endpoint includes the `error_count` key
523
- if (
524
- mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT in endpoint
525
- and endpoint[
526
- mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT
527
- ]
528
- == "null"
529
- ):
530
- endpoint[
531
- mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT
532
- ] = "0"
533
-
534
- # Validate default value for `metrics`
535
- # For backwards compatibility reasons, we validate that the model endpoint includes the `metrics` key
536
- if (
537
- mlrun.common.schemas.model_monitoring.EventFieldType.METRICS in endpoint
538
- and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
539
- == "null"
540
- ):
541
- endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
542
- json.dumps(
543
- {
544
- mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
545
- mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
546
- mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
547
- }
548
- }
549
- )
550
- )
551
- # Validate key `uid` instead of `endpoint_id`
552
- # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
553
- if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
554
- endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
555
- endpoint[
556
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
557
- ]
558
- )
559
-
560
- @staticmethod
561
- def _encode_field(field: typing.Union[str, bytes]) -> bytes:
562
- """Encode a provided field. Mainly used when storing data in the KV table."""
563
-
564
- if isinstance(field, str):
565
- return field.encode("ascii")
566
- return field
567
-
568
- @staticmethod
569
- def _decode_field(field: typing.Union[str, bytes]) -> str:
570
- """Decode a provided field. Mainly used when retrieving data from the KV table."""
571
-
572
- if isinstance(field, bytes):
573
- return field.decode()
574
- return field