mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +71 -36
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +54 -16
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +21 -16
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  243. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  244. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  245. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  246. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  247. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  248. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  249. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  250. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  251. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  252. mlrun/model_monitoring/model_endpoint.py +0 -118
  253. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  254. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  255. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +0 -0
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -1,726 +0,0 @@
1
- # Copyright 2023 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import http
15
- import json
16
- import typing
17
- from dataclasses import dataclass
18
- from http import HTTPStatus
19
-
20
- import v3io.dataplane
21
- import v3io.dataplane.output
22
- import v3io.dataplane.response
23
- from v3io.dataplane import Client as V3IOClient
24
-
25
- import mlrun.common.model_monitoring.helpers
26
- import mlrun.common.schemas.model_monitoring as mm_schemas
27
- import mlrun.utils.v3io_clients
28
- from mlrun.model_monitoring.db import StoreBase
29
- from mlrun.utils import logger
30
-
31
- # Fields to encode before storing in the KV table or to decode after retrieving
32
- fields_to_encode_decode = [
33
- mm_schemas.EventFieldType.FEATURE_STATS,
34
- mm_schemas.EventFieldType.CURRENT_STATS,
35
- ]
36
-
37
- _METRIC_FIELDS: list[str] = [
38
- mm_schemas.WriterEvent.APPLICATION_NAME.value,
39
- mm_schemas.MetricData.METRIC_NAME.value,
40
- mm_schemas.MetricData.METRIC_VALUE.value,
41
- mm_schemas.WriterEvent.START_INFER_TIME.value,
42
- mm_schemas.WriterEvent.END_INFER_TIME.value,
43
- ]
44
-
45
-
46
- class SchemaField(typing.TypedDict):
47
- name: str
48
- type: str
49
- nullable: bool
50
-
51
-
52
- @dataclass
53
- class SchemaParams:
54
- key: str
55
- fields: list[SchemaField]
56
-
57
-
58
- _RESULT_SCHEMA: list[SchemaField] = [
59
- SchemaField(
60
- name=mm_schemas.ResultData.RESULT_NAME,
61
- type=mm_schemas.GrafanaColumnType.STRING,
62
- nullable=False,
63
- )
64
- ]
65
-
66
- _METRIC_SCHEMA: list[SchemaField] = [
67
- SchemaField(
68
- name=mm_schemas.WriterEvent.APPLICATION_NAME,
69
- type=mm_schemas.GrafanaColumnType.STRING,
70
- nullable=False,
71
- ),
72
- SchemaField(
73
- name=mm_schemas.MetricData.METRIC_NAME,
74
- type=mm_schemas.GrafanaColumnType.STRING,
75
- nullable=False,
76
- ),
77
- ]
78
-
79
-
80
- _KIND_TO_SCHEMA_PARAMS: dict[mm_schemas.WriterEventKind, SchemaParams] = {
81
- mm_schemas.WriterEventKind.RESULT: SchemaParams(
82
- key=mm_schemas.WriterEvent.APPLICATION_NAME, fields=_RESULT_SCHEMA
83
- ),
84
- mm_schemas.WriterEventKind.METRIC: SchemaParams(
85
- key="metric_id", fields=_METRIC_SCHEMA
86
- ),
87
- }
88
-
89
- _EXCLUDE_SCHEMA_FILTER_EXPRESSION = '__name!=".#schema"'
90
-
91
-
92
- class KVStoreBase(StoreBase):
93
- type: typing.ClassVar[str] = "v3io-nosql"
94
- """
95
- Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
96
- client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
97
- """
98
-
99
- def __init__(
100
- self,
101
- project: str,
102
- ) -> None:
103
- super().__init__(project=project)
104
- self._client = None
105
- # Get the KV table path and container
106
- self.path, self.container = self._get_path_and_container()
107
-
108
- @property
109
- def client(self) -> V3IOClient:
110
- if not self._client:
111
- self._client = mlrun.utils.v3io_clients.get_v3io_client(
112
- endpoint=mlrun.mlconf.v3io_api,
113
- )
114
- return self._client
115
-
116
- def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
117
- """
118
- Create a new endpoint record in the KV table.
119
-
120
- :param endpoint: model endpoint dictionary that will be written into the DB.
121
- """
122
-
123
- for field in fields_to_encode_decode:
124
- if field in endpoint:
125
- # Encode to binary data
126
- endpoint[field] = self._encode_field(endpoint[field])
127
-
128
- self.client.kv.put(
129
- container=self.container,
130
- table_path=self.path,
131
- key=endpoint[mm_schemas.EventFieldType.UID],
132
- attributes=endpoint,
133
- )
134
-
135
- self._infer_kv_schema()
136
-
137
- def update_model_endpoint(
138
- self, endpoint_id: str, attributes: dict[str, typing.Any]
139
- ):
140
- """
141
- Update a model endpoint record with a given attributes.
142
-
143
- :param endpoint_id: The unique id of the model endpoint.
144
- :param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
145
- of the attributes dictionary should exist in the KV table.
146
-
147
- """
148
-
149
- for field in fields_to_encode_decode:
150
- if field in attributes:
151
- # Encode to binary data
152
- attributes[field] = self._encode_field(attributes[field])
153
-
154
- self.client.kv.update(
155
- container=self.container,
156
- table_path=self.path,
157
- key=endpoint_id,
158
- attributes=attributes,
159
- )
160
-
161
- def delete_model_endpoint(
162
- self,
163
- endpoint_id: str,
164
- ):
165
- """
166
- Deletes the KV record of a given model endpoint id.
167
-
168
- :param endpoint_id: The unique id of the model endpoint.
169
- """
170
-
171
- self.client.kv.delete(
172
- container=self.container,
173
- table_path=self.path,
174
- key=endpoint_id,
175
- )
176
-
177
- def get_model_endpoint(
178
- self,
179
- endpoint_id: str,
180
- ) -> dict[str, typing.Any]:
181
- """
182
- Get a single model endpoint record.
183
-
184
- :param endpoint_id: The unique id of the model endpoint.
185
-
186
- :return: A model endpoint record as a dictionary.
187
-
188
- :raise MLRunNotFoundError: If the endpoint was not found.
189
- """
190
-
191
- # Getting the raw data from the KV table
192
- endpoint = self.client.kv.get(
193
- container=self.container,
194
- table_path=self.path,
195
- key=endpoint_id,
196
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
197
- )
198
- endpoint = endpoint.output.item
199
-
200
- for field in fields_to_encode_decode:
201
- if field in endpoint:
202
- # Decode binary data
203
- endpoint[field] = self._decode_field(endpoint[field])
204
-
205
- if not endpoint:
206
- raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
207
-
208
- # For backwards compatability: replace null values for `error_count` and `metrics`
209
- self.validate_old_schema_fields(endpoint=endpoint)
210
-
211
- return endpoint
212
-
213
- def _get_path_and_container(self):
214
- """Getting path and container based on the model monitoring configurations"""
215
- path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
216
- project=self.project,
217
- kind=mm_schemas.ModelMonitoringStoreKinds.ENDPOINTS,
218
- )
219
- (
220
- _,
221
- container,
222
- path,
223
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
224
- path
225
- )
226
- return path, container
227
-
228
- def list_model_endpoints(
229
- self,
230
- model: str = None,
231
- function: str = None,
232
- labels: list[str] = None,
233
- top_level: bool = None,
234
- uids: list = None,
235
- include_stats: bool = None,
236
- ) -> list[dict[str, typing.Any]]:
237
- # # Initialize an empty model endpoints list
238
- endpoint_list = []
239
-
240
- # Retrieve the raw data from the KV table and get the endpoint ids
241
- try:
242
- cursor = self.client.kv.new_cursor(
243
- container=self.container,
244
- table_path=self.path,
245
- filter_expression=self._build_kv_cursor_filter_expression(
246
- self.project,
247
- function,
248
- model,
249
- top_level,
250
- ),
251
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
252
- )
253
- items = cursor.all()
254
-
255
- except Exception as exc:
256
- logger.warning(
257
- "Failed retrieving raw data from kv table",
258
- exc=mlrun.errors.err_to_str(exc),
259
- )
260
- return endpoint_list
261
- # Create a list of model endpoints unique ids
262
- if uids is None:
263
- uids = []
264
- for item in items:
265
- if mm_schemas.EventFieldType.UID not in item:
266
- # This is kept for backwards compatibility - in old versions the key column named endpoint_id
267
- uids.append(item[mm_schemas.EventFieldType.ENDPOINT_ID])
268
- else:
269
- uids.append(item[mm_schemas.EventFieldType.UID])
270
-
271
- # Add each relevant model endpoint to the model endpoints list
272
- for endpoint_id in uids:
273
- endpoint_dict = self.get_model_endpoint(
274
- endpoint_id=endpoint_id,
275
- )
276
- if not include_stats:
277
- # Exclude these fields when listing model endpoints to avoid returning too much data (ML-6594)
278
- endpoint_dict.pop(mm_schemas.EventFieldType.FEATURE_STATS)
279
- endpoint_dict.pop(mm_schemas.EventFieldType.CURRENT_STATS)
280
-
281
- if labels and not self._validate_labels(
282
- endpoint_dict=endpoint_dict, labels=labels
283
- ):
284
- continue
285
-
286
- endpoint_list.append(endpoint_dict)
287
-
288
- return endpoint_list
289
-
290
- def delete_model_endpoints_resources(self):
291
- """
292
- Delete all model endpoints resources in V3IO KV.
293
- """
294
- logger.debug(
295
- "Deleting model monitoring endpoints resources in V3IO KV",
296
- project=self.project,
297
- )
298
-
299
- endpoints = self.list_model_endpoints()
300
-
301
- # Delete model endpoint record from KV table
302
- for endpoint_dict in endpoints:
303
- if mm_schemas.EventFieldType.UID not in endpoint_dict:
304
- # This is kept for backwards compatibility - in old versions the key column named endpoint_id
305
- endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
306
- else:
307
- endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
308
-
309
- logger.debug(
310
- "Deleting model endpoint resources from the V3IO KV table",
311
- endpoint_id=endpoint_id,
312
- project=self.project,
313
- )
314
-
315
- self.delete_model_endpoint(
316
- endpoint_id,
317
- )
318
-
319
- logger.debug(
320
- "Successfully deleted model monitoring endpoints from the V3IO KV table",
321
- project=self.project,
322
- )
323
-
324
- # Delete remain records in the KV
325
- all_records = self.client.kv.new_cursor(
326
- container=self.container,
327
- table_path=self.path,
328
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
329
- ).all()
330
-
331
- all_records = [r["__name"] for r in all_records]
332
-
333
- # Cleanup KV
334
- for record in all_records:
335
- self.client.kv.delete(
336
- container=self.container,
337
- table_path=self.path,
338
- key=record,
339
- raise_for_status=v3io.dataplane.RaiseForStatus.never,
340
- )
341
-
342
- @staticmethod
343
- def _get_results_table_path(endpoint_id: str) -> str:
344
- return endpoint_id
345
-
346
- @staticmethod
347
- def _get_metrics_table_path(endpoint_id: str) -> str:
348
- return f"{endpoint_id}_metrics"
349
-
350
- def write_application_event(
351
- self,
352
- event: dict[str, typing.Any],
353
- kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
354
- ) -> None:
355
- """
356
- Write a new application event in the target table.
357
-
358
- :param event: An event dictionary that represents the application result, should be corresponded to the
359
- schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
360
- object.
361
- :param kind: The type of the event, can be either "result" or "metric".
362
- """
363
-
364
- container = self.get_v3io_monitoring_apps_container(project_name=self.project)
365
- endpoint_id = event.pop(mm_schemas.WriterEvent.ENDPOINT_ID)
366
-
367
- if kind == mm_schemas.WriterEventKind.METRIC:
368
- table_path = self._get_metrics_table_path(endpoint_id)
369
- key = f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}.{event[mm_schemas.MetricData.METRIC_NAME]}"
370
- attributes = {event_key: event[event_key] for event_key in _METRIC_FIELDS}
371
- elif kind == mm_schemas.WriterEventKind.RESULT:
372
- table_path = self._get_results_table_path(endpoint_id)
373
- key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
374
- metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
375
- attributes = {metric_name: self._encode_field(json.dumps(event))}
376
- else:
377
- raise ValueError(f"Invalid {kind = }")
378
-
379
- self.client.kv.update(
380
- container=container,
381
- table_path=table_path,
382
- key=key,
383
- attributes=attributes,
384
- )
385
-
386
- schema_file = self.client.kv.new_cursor(
387
- container=container,
388
- table_path=table_path,
389
- filter_expression='__name==".#schema"',
390
- )
391
-
392
- if not schema_file.all():
393
- logger.info(
394
- "Generating a new V3IO KV schema file",
395
- container=container,
396
- table_path=table_path,
397
- )
398
- self._generate_kv_schema(
399
- container=container, table_path=table_path, kind=kind
400
- )
401
- logger.info("Updated V3IO KV successfully", key=key)
402
-
403
- def _generate_kv_schema(
404
- self, *, container: str, table_path: str, kind: mm_schemas.WriterEventKind
405
- ) -> None:
406
- """Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
407
- schema_params = _KIND_TO_SCHEMA_PARAMS[kind]
408
- res = self.client.kv.create_schema(
409
- container=container,
410
- table_path=table_path,
411
- key=schema_params.key,
412
- fields=schema_params.fields,
413
- )
414
- if res.status_code != HTTPStatus.OK:
415
- raise mlrun.errors.MLRunBadRequestError(
416
- f"Couldn't infer schema for endpoint {table_path} which is required for Grafana dashboards"
417
- )
418
- else:
419
- logger.info("Generated V3IO KV schema successfully", table_path=table_path)
420
-
421
- def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
422
- """
423
- Get the last analyzed time for the provided model endpoint and application.
424
-
425
- :param endpoint_id: The unique id of the model endpoint.
426
- :param application_name: Registered application name.
427
-
428
- :return: Timestamp as a Unix time.
429
- :raise: MLRunNotFoundError if last analyzed value is not found.
430
-
431
- """
432
- try:
433
- response = self.client.kv.get(
434
- container=self._get_monitoring_schedules_container(
435
- project_name=self.project
436
- ),
437
- table_path=endpoint_id,
438
- key=application_name,
439
- )
440
- return response.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
441
- except v3io.dataplane.response.HttpResponseError as err:
442
- if err.status_code == http.HTTPStatus.NOT_FOUND:
443
- logger.debug("Last analyzed time not found", err=err)
444
- raise mlrun.errors.MLRunNotFoundError(
445
- f"No last analyzed value has been found for {application_name} "
446
- f"that processes model endpoint {endpoint_id}",
447
- )
448
- logger.error("Error while getting last analyzed time", err=err)
449
- raise err
450
-
451
- def update_last_analyzed(
452
- self, endpoint_id: str, application_name: str, last_analyzed: int
453
- ):
454
- """
455
- Update the last analyzed time for the provided model endpoint and application.
456
-
457
- :param endpoint_id: The unique id of the model endpoint.
458
- :param application_name: Registered application name.
459
- :param last_analyzed: Timestamp as a Unix time that represents the last analyzed time of a certain
460
- application and model endpoint.
461
- """
462
- self.client.kv.put(
463
- container=self._get_monitoring_schedules_container(
464
- project_name=self.project
465
- ),
466
- table_path=endpoint_id,
467
- key=application_name,
468
- attributes={mm_schemas.SchedulingKeys.LAST_ANALYZED: last_analyzed},
469
- )
470
-
471
- def _generate_tsdb_paths(self) -> tuple[str, str]:
472
- """Generate a short path to the TSDB resources and a filtered path for the frames object
473
- :return: A tuple of:
474
- [0] = Short path to the TSDB resources
475
- [1] = Filtered path to TSDB events without schema and container
476
- """
477
- # Full path for the time series DB events
478
- full_path = (
479
- mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
480
- project=self.project,
481
- kind=mm_schemas.ModelMonitoringStoreKinds.EVENTS,
482
- )
483
- )
484
-
485
- # Generate the main directory with the TSDB resources
486
- tsdb_path = (
487
- mlrun.common.model_monitoring.helpers.parse_model_endpoint_project_prefix(
488
- full_path, self.project
489
- )
490
- )
491
-
492
- # Generate filtered path without schema and container as required by the frames object
493
- (
494
- _,
495
- _,
496
- filtered_path,
497
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
498
- full_path
499
- )
500
- return tsdb_path, filtered_path
501
-
502
- def _infer_kv_schema(self):
503
- """
504
- Create KV schema file if not exist. This schema is being used by the Grafana dashboards.
505
- """
506
-
507
- schema_file = self.client.kv.new_cursor(
508
- container=self.container,
509
- table_path=self.path,
510
- filter_expression='__name==".#schema"',
511
- )
512
-
513
- if not schema_file.all():
514
- logger.info("Generate a new V3IO KV schema file", kv_table_path=self.path)
515
- frames_client = self._get_frames_client()
516
- frames_client.execute(backend="kv", table=self.path, command="infer_schema")
517
-
518
- def _get_frames_client(self):
519
- return mlrun.utils.v3io_clients.get_frames_client(
520
- address=mlrun.mlconf.v3io_framesd,
521
- container=self.container,
522
- )
523
-
524
- @staticmethod
525
- def _build_kv_cursor_filter_expression(
526
- project: str,
527
- function: str = None,
528
- model: str = None,
529
- top_level: bool = False,
530
- ) -> str:
531
- """
532
- Convert the provided filters into a valid filter expression. The expected filter expression includes different
533
- conditions, divided by ' AND '.
534
-
535
- :param project: The name of the project.
536
- :param model: The name of the model to filter by.
537
- :param function: The name of the function to filter by.
538
- :param top_level: If True will return only routers and endpoint that are NOT children of any router.
539
-
540
- :return: A valid filter expression as a string.
541
-
542
- :raise MLRunInvalidArgumentError: If project value is None.
543
- """
544
-
545
- if not project:
546
- raise mlrun.errors.MLRunInvalidArgumentError("project can't be empty")
547
-
548
- # Add project filter
549
- filter_expression = [f"{mm_schemas.EventFieldType.PROJECT}=='{project}'"]
550
-
551
- # Add function and model filters
552
- if function:
553
- function_uri = f"{project}/{function}" if function else None
554
- filter_expression.append(
555
- f"{mm_schemas.EventFieldType.FUNCTION_URI}=='{function_uri}'"
556
- )
557
- if model:
558
- model = model if ":" in model else f"{model}:latest"
559
- filter_expression.append(f"{mm_schemas.EventFieldType.MODEL}=='{model}'")
560
-
561
- # Apply top_level filter (remove endpoints that considered a child of a router)
562
- if top_level:
563
- filter_expression.append(
564
- f"(endpoint_type=='{str(mm_schemas.EndpointType.NODE_EP.value)}' "
565
- f"OR endpoint_type=='{str(mm_schemas.EndpointType.ROUTER.value)}')"
566
- )
567
-
568
- return " AND ".join(filter_expression)
569
-
570
- @staticmethod
571
- def validate_old_schema_fields(endpoint: dict):
572
- """
573
- Replace default null values for `error_count` and `metrics` for users that logged a model endpoint before 1.3.0.
574
- In addition, this function also validates that the key name of the endpoint unique id is `uid` and not
575
- `endpoint_id` that has been used before 1.3.0.
576
-
577
- Leaving here for backwards compatibility which related to the model endpoint schema.
578
-
579
- :param endpoint: An endpoint flattened dictionary.
580
- """
581
-
582
- # Validate default value for `error_count`
583
- # For backwards compatibility reasons, we validate that the model endpoint includes the `error_count` key
584
- if (
585
- mm_schemas.EventFieldType.ERROR_COUNT in endpoint
586
- and endpoint[mm_schemas.EventFieldType.ERROR_COUNT] == "null"
587
- ):
588
- endpoint[mm_schemas.EventFieldType.ERROR_COUNT] = "0"
589
-
590
- # Validate default value for `metrics`
591
- # For backwards compatibility reasons, we validate that the model endpoint includes the `metrics` key
592
- if (
593
- mm_schemas.EventFieldType.METRICS in endpoint
594
- and endpoint[mm_schemas.EventFieldType.METRICS] == "null"
595
- ):
596
- endpoint[mm_schemas.EventFieldType.METRICS] = json.dumps(
597
- {
598
- mm_schemas.EventKeyMetrics.GENERIC: {
599
- mm_schemas.EventLiveStats.LATENCY_AVG_1H: 0,
600
- mm_schemas.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
601
- }
602
- }
603
- )
604
- # Validate key `uid` instead of `endpoint_id`
605
- # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
606
- if mm_schemas.EventFieldType.ENDPOINT_ID in endpoint:
607
- endpoint[mm_schemas.EventFieldType.UID] = endpoint[
608
- mm_schemas.EventFieldType.ENDPOINT_ID
609
- ]
610
-
611
- @staticmethod
612
- def _encode_field(field: typing.Union[str, bytes]) -> bytes:
613
- """Encode a provided field. Mainly used when storing data in the KV table."""
614
-
615
- if isinstance(field, str):
616
- return field.encode("ascii")
617
- return field
618
-
619
- @staticmethod
620
- def _decode_field(field: typing.Union[str, bytes]) -> str:
621
- """Decode a provided field. Mainly used when retrieving data from the KV table."""
622
-
623
- if isinstance(field, bytes):
624
- return field.decode()
625
- return field
626
-
627
- @staticmethod
628
- def get_v3io_monitoring_apps_container(project_name: str) -> str:
629
- return f"users/pipelines/{project_name}/monitoring-apps"
630
-
631
- @staticmethod
632
- def _get_monitoring_schedules_container(project_name: str) -> str:
633
- return f"users/pipelines/{project_name}/monitoring-schedules/functions"
634
-
635
- def _extract_results_from_items(
636
- self, app_items: list[dict[str, str]]
637
- ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
638
- """Assuming .#schema items are filtered out"""
639
- metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
640
- for app_item in app_items:
641
- app_name = app_item.pop("__name")
642
- for result_name in app_item:
643
- metrics.append(
644
- mm_schemas.ModelEndpointMonitoringMetric(
645
- project=self.project,
646
- app=app_name,
647
- type=mm_schemas.ModelEndpointMonitoringMetricType.RESULT,
648
- name=result_name,
649
- full_name=mm_schemas.model_endpoints._compose_full_name(
650
- project=self.project, app=app_name, name=result_name
651
- ),
652
- )
653
- )
654
- return metrics
655
-
656
- def _extract_metrics_from_items(
657
- self, result_items: list[dict[str, str]]
658
- ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
659
- metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
660
- logger.debug("Result items", result_items=result_items)
661
- for result_item in result_items:
662
- app = result_item[mm_schemas.WriterEvent.APPLICATION_NAME]
663
- name = result_item[mm_schemas.MetricData.METRIC_NAME]
664
- metrics.append(
665
- mm_schemas.ModelEndpointMonitoringMetric(
666
- project=self.project,
667
- app=app,
668
- type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
669
- name=name,
670
- full_name=mm_schemas.model_endpoints._compose_full_name(
671
- project=self.project,
672
- app=app,
673
- name=name,
674
- type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
675
- ),
676
- )
677
- )
678
- return metrics
679
-
680
- def get_model_endpoint_metrics(
681
- self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType
682
- ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
683
- """Get model monitoring results and metrics on the endpoint"""
684
- metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
685
- container = self.get_v3io_monitoring_apps_container(self.project)
686
- if type == mm_schemas.ModelEndpointMonitoringMetricType.METRIC:
687
- table_path = self._get_metrics_table_path(endpoint_id)
688
- items_extractor = self._extract_metrics_from_items
689
- elif type == mm_schemas.ModelEndpointMonitoringMetricType.RESULT:
690
- table_path = self._get_results_table_path(endpoint_id)
691
- items_extractor = self._extract_results_from_items
692
- else:
693
- raise ValueError(f"Invalid metric {type = }")
694
-
695
- def scan(
696
- marker: typing.Optional[str] = None,
697
- ) -> v3io.dataplane.response.Response:
698
- # TODO: Use AIO client: `v3io.aio.dataplane.client.Client`
699
- return self.client.kv.scan(
700
- container=container,
701
- table_path=table_path,
702
- marker=marker,
703
- filter_expression=_EXCLUDE_SCHEMA_FILTER_EXPRESSION,
704
- )
705
-
706
- try:
707
- response = scan()
708
- except v3io.dataplane.response.HttpResponseError as err:
709
- if err.status_code == HTTPStatus.NOT_FOUND:
710
- logger.warning(
711
- f"Attempt getting {type}s - no data. Check the "
712
- "project name, endpoint, or wait for the applications to start.",
713
- container=container,
714
- table_path=table_path,
715
- )
716
- return []
717
- raise
718
-
719
- while True:
720
- output = typing.cast(v3io.dataplane.output.GetItemsOutput, response.output)
721
- metrics.extend(items_extractor(output.items))
722
- if output.last:
723
- break
724
- response = scan(marker=output.next_marker)
725
-
726
- return metrics