mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,849 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime, timezone
16
+ from io import StringIO
17
+ from typing import Literal, Optional, Union
18
+
19
+ import pandas as pd
20
+ import v3io_frames
21
+ import v3io_frames.client
22
+
23
+ import mlrun.common.model_monitoring
24
+ import mlrun.common.schemas.model_monitoring as mm_schemas
25
+ import mlrun.feature_store.steps
26
+ import mlrun.utils.v3io_clients
27
+ from mlrun.common.schemas import EventFieldType
28
+ from mlrun.model_monitoring.db import TSDBConnector
29
+ from mlrun.model_monitoring.helpers import get_invocations_fqn
30
+ from mlrun.utils import logger
31
+
32
+ _TSDB_BE = "tsdb"
33
+ _TSDB_RATE = "1/s"
34
+ _CONTAINER = "users"
35
+
36
+
37
+ def _is_no_schema_error(exc: v3io_frames.Error) -> bool:
38
+ """
39
+ In case of a nonexistent TSDB table - a `v3io_frames.ReadError` error is raised.
40
+ Check if the error message contains the relevant string to verify the cause.
41
+ """
42
+ msg = str(exc)
43
+ # https://github.com/v3io/v3io-tsdb/blob/v0.14.1/pkg/tsdb/v3iotsdb.go#L205
44
+ # https://github.com/v3io/v3io-tsdb/blob/v0.14.1/pkg/partmgr/partmgr.go#L238
45
+ return "No TSDB schema file found" in msg or "Failed to read schema at path" in msg
46
+
47
+
48
+ class V3IOTSDBConnector(TSDBConnector):
49
+ """
50
+ Handles the TSDB operations when the TSDB connector is of type V3IO. To manage these operations we use V3IO Frames
51
+ Client that provides API for executing commands on the V3IO TSDB table.
52
+ """
53
+
54
+ type: str = mm_schemas.TSDBTarget.V3IO_TSDB
55
+
56
+ def __init__(
57
+ self,
58
+ project: str,
59
+ container: str = _CONTAINER,
60
+ v3io_framesd: Optional[str] = None,
61
+ create_table: bool = False,
62
+ ) -> None:
63
+ super().__init__(project=project)
64
+
65
+ self.container = container
66
+
67
+ self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
68
+ self._frames_client: Optional[v3io_frames.client.ClientBase] = None
69
+ self._init_tables_path()
70
+ self._create_table = create_table
71
+
72
+ @property
73
+ def frames_client(self) -> v3io_frames.client.ClientBase:
74
+ if not self._frames_client:
75
+ self._frames_client = self._get_v3io_frames_client(self.container)
76
+ if self._create_table:
77
+ self.create_tables()
78
+ return self._frames_client
79
+
80
+ def _init_tables_path(self):
81
+ self.tables = {}
82
+
83
+ events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
84
+ project=self.project,
85
+ kind=mm_schemas.FileTargetKind.EVENTS,
86
+ )
87
+ (
88
+ _,
89
+ _,
90
+ events_path,
91
+ ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
92
+ events_table_full_path
93
+ )
94
+ self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
95
+
96
+ errors_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
97
+ project=self.project,
98
+ kind=mm_schemas.FileTargetKind.ERRORS,
99
+ )
100
+ (
101
+ _,
102
+ _,
103
+ errors_path,
104
+ ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
105
+ errors_table_full_path
106
+ )
107
+ self.tables[mm_schemas.V3IOTSDBTables.ERRORS] = errors_path
108
+
109
+ monitoring_application_full_path = (
110
+ mlrun.mlconf.get_model_monitoring_file_target_path(
111
+ project=self.project,
112
+ kind=mm_schemas.FileTargetKind.MONITORING_APPLICATION,
113
+ )
114
+ )
115
+ (
116
+ _,
117
+ _,
118
+ monitoring_application_path,
119
+ ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
120
+ monitoring_application_full_path
121
+ )
122
+ self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS] = (
123
+ monitoring_application_path + mm_schemas.V3IOTSDBTables.APP_RESULTS
124
+ )
125
+ self.tables[mm_schemas.V3IOTSDBTables.METRICS] = (
126
+ monitoring_application_path + mm_schemas.V3IOTSDBTables.METRICS
127
+ )
128
+
129
+ monitoring_predictions_full_path = (
130
+ mlrun.mlconf.get_model_monitoring_file_target_path(
131
+ project=self.project,
132
+ kind=mm_schemas.FileTargetKind.PREDICTIONS,
133
+ )
134
+ )
135
+ (
136
+ _,
137
+ _,
138
+ monitoring_predictions_path,
139
+ ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
140
+ monitoring_predictions_full_path
141
+ )
142
+ self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
143
+
144
+ def create_tables(self) -> None:
145
+ """
146
+ Create the tables using the TSDB connector. The tables are being created in the V3IO TSDB and include:
147
+ - app_results: a detailed result that includes status, kind, extra data, etc.
148
+ - metrics: a basic key value that represents a single numeric metric.
149
+ Note that the predictions table is automatically created by the model monitoring stream pod.
150
+ """
151
+ application_tables = [
152
+ mm_schemas.V3IOTSDBTables.APP_RESULTS,
153
+ mm_schemas.V3IOTSDBTables.METRICS,
154
+ ]
155
+ for table_name in application_tables:
156
+ logger.info("Creating table in V3IO TSDB", table_name=table_name)
157
+ table = self.tables[table_name]
158
+ self.frames_client.create(
159
+ backend=_TSDB_BE,
160
+ table=table,
161
+ if_exists=v3io_frames.IGNORE,
162
+ rate=_TSDB_RATE,
163
+ )
164
+
165
+ def apply_monitoring_stream_steps(
166
+ self,
167
+ graph,
168
+ tsdb_batching_max_events: int = 1000,
169
+ tsdb_batching_timeout_secs: int = 30,
170
+ sample_window: int = 10,
171
+ ):
172
+ """
173
+ Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
174
+ different key metric dictionaries.This data is being used by the monitoring dashboards in
175
+ grafana. Results can be found under v3io:///users/pipelines/project-name/model-endpoints/events/.
176
+ In that case, we generate 3 different key metric dictionaries:
177
+ - base_metrics (average latency and predictions over time)
178
+ - endpoint_features (Prediction and feature names and values)
179
+ - custom_metrics (user-defined metrics)
180
+ """
181
+
182
+ # Write latency per prediction, labeled by endpoint ID only
183
+ graph.add_step(
184
+ "storey.TSDBTarget",
185
+ name="tsdb_predictions",
186
+ after="MapFeatureNames",
187
+ path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
188
+ rate="1/s",
189
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
190
+ container=self.container,
191
+ v3io_frames=self.v3io_framesd,
192
+ columns=[
193
+ mm_schemas.EventFieldType.LATENCY,
194
+ mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP,
195
+ ],
196
+ index_cols=[
197
+ mm_schemas.EventFieldType.ENDPOINT_ID,
198
+ ],
199
+ aggr="count,avg",
200
+ aggr_granularity="1m",
201
+ max_events=tsdb_batching_max_events,
202
+ flush_after_seconds=tsdb_batching_timeout_secs,
203
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
204
+ )
205
+
206
+ # Emits the event in window size of events based on sample_window size (10 by default)
207
+ graph.add_step(
208
+ "storey.steps.SampleWindow",
209
+ name="sample",
210
+ after="Rename",
211
+ window_size=sample_window,
212
+ key=EventFieldType.ENDPOINT_ID,
213
+ )
214
+
215
+ # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
216
+ # stats and details about the events
217
+
218
+ graph.add_step(
219
+ "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
220
+ name="ProcessBeforeTSDB",
221
+ after="sample",
222
+ )
223
+
224
+ # Unpacked keys from each dictionary and write to TSDB target
225
+ def apply_filter_and_unpacked_keys(name, keys):
226
+ graph.add_step(
227
+ "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.FilterAndUnpackKeys",
228
+ name=name,
229
+ after="ProcessBeforeTSDB",
230
+ keys=[keys],
231
+ )
232
+
233
+ def apply_tsdb_target(name, after):
234
+ graph.add_step(
235
+ "storey.TSDBTarget",
236
+ name=name,
237
+ after=after,
238
+ path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.EVENTS]}",
239
+ rate="10/m",
240
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
241
+ container=self.container,
242
+ v3io_frames=self.v3io_framesd,
243
+ infer_columns_from_data=True,
244
+ index_cols=[
245
+ mm_schemas.EventFieldType.ENDPOINT_ID,
246
+ mm_schemas.EventFieldType.RECORD_TYPE,
247
+ mm_schemas.EventFieldType.ENDPOINT_TYPE,
248
+ ],
249
+ max_events=tsdb_batching_max_events,
250
+ flush_after_seconds=tsdb_batching_timeout_secs,
251
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
252
+ )
253
+
254
+ # unpacked base_metrics dictionary
255
+ apply_filter_and_unpacked_keys(
256
+ name="FilterAndUnpackKeys1",
257
+ keys=mm_schemas.EventKeyMetrics.BASE_METRICS,
258
+ )
259
+ apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
260
+
261
+ # unpacked endpoint_features dictionary
262
+ apply_filter_and_unpacked_keys(
263
+ name="FilterAndUnpackKeys2",
264
+ keys=mm_schemas.EventKeyMetrics.ENDPOINT_FEATURES,
265
+ )
266
+ apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
267
+
268
+ # unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
269
+ apply_filter_and_unpacked_keys(
270
+ name="FilterAndUnpackKeys3",
271
+ keys=mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
272
+ )
273
+
274
+ def apply_storey_filter():
275
+ graph.add_step(
276
+ "storey.Filter",
277
+ "FilterNotNone",
278
+ after="FilterAndUnpackKeys3",
279
+ _fn="(event is not None)",
280
+ )
281
+
282
+ apply_storey_filter()
283
+ apply_tsdb_target(name="tsdb3", after="FilterNotNone")
284
+
285
+ def handle_model_error(
286
+ self,
287
+ graph,
288
+ tsdb_batching_max_events: int = 1000,
289
+ tsdb_batching_timeout_secs: int = 30,
290
+ **kwargs,
291
+ ) -> None:
292
+ graph.add_step(
293
+ "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ErrorExtractor",
294
+ name="error_extractor",
295
+ after="ForwardError",
296
+ )
297
+
298
+ graph.add_step(
299
+ "storey.TSDBTarget",
300
+ name="tsdb_error",
301
+ after="error_extractor",
302
+ path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.ERRORS]}",
303
+ rate="1/s",
304
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
305
+ container=self.container,
306
+ v3io_frames=self.v3io_framesd,
307
+ columns=[
308
+ mm_schemas.EventFieldType.MODEL_ERROR,
309
+ mm_schemas.EventFieldType.ERROR_COUNT,
310
+ ],
311
+ index_cols=[
312
+ mm_schemas.EventFieldType.ENDPOINT_ID,
313
+ ],
314
+ max_events=tsdb_batching_max_events,
315
+ flush_after_seconds=tsdb_batching_timeout_secs,
316
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
317
+ )
318
+
319
+ def write_application_event(
320
+ self,
321
+ event: dict,
322
+ kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
323
+ ) -> None:
324
+ """Write a single result or metric to TSDB"""
325
+
326
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = datetime.fromisoformat(
327
+ event[mm_schemas.WriterEvent.END_INFER_TIME]
328
+ )
329
+ index_cols_base = [
330
+ mm_schemas.WriterEvent.END_INFER_TIME,
331
+ mm_schemas.WriterEvent.ENDPOINT_ID,
332
+ mm_schemas.WriterEvent.APPLICATION_NAME,
333
+ ]
334
+
335
+ if kind == mm_schemas.WriterEventKind.METRIC:
336
+ table = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
337
+ index_cols = index_cols_base + [mm_schemas.MetricData.METRIC_NAME]
338
+ elif kind == mm_schemas.WriterEventKind.RESULT:
339
+ table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
340
+ index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
341
+ event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
342
+ # TODO: remove this when extra data is supported (ML-7460)
343
+ event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
344
+ else:
345
+ raise ValueError(f"Invalid {kind = }")
346
+
347
+ try:
348
+ self.frames_client.write(
349
+ backend=_TSDB_BE,
350
+ table=table,
351
+ dfs=pd.DataFrame.from_records([event]),
352
+ index_cols=index_cols,
353
+ )
354
+ logger.info("Updated V3IO TSDB successfully", table=table)
355
+ except v3io_frames.Error as err:
356
+ logger.exception(
357
+ "Could not write drift measures to TSDB",
358
+ err=err,
359
+ table=table,
360
+ event=event,
361
+ )
362
+ raise mlrun.errors.MLRunRuntimeError(
363
+ f"Failed to write application result to TSDB: {err}"
364
+ )
365
+
366
+ def delete_tsdb_resources(self, table: Optional[str] = None):
367
+ if table:
368
+ # Delete a specific table
369
+ tables = [table]
370
+ else:
371
+ # Delete all tables
372
+ tables = mm_schemas.V3IOTSDBTables.list()
373
+ for table_to_delete in tables:
374
+ try:
375
+ self.frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
376
+ except v3io_frames.DeleteError as e:
377
+ logger.warning(
378
+ f"Failed to delete TSDB table '{table}'",
379
+ err=mlrun.errors.err_to_str(e),
380
+ )
381
+
382
+ # Final cleanup of tsdb path
383
+ tsdb_path = self._get_v3io_source_directory()
384
+ tsdb_path.replace("://u", ":///u")
385
+ store, _, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
386
+ store.rm(tsdb_path, recursive=True)
387
+
388
+ def get_model_endpoint_real_time_metrics(
389
+ self, endpoint_id: str, metrics: list[str], start: str, end: str
390
+ ) -> dict[str, list[tuple[str, float]]]:
391
+ """
392
+ Getting real time metrics from the TSDB. There are pre-defined metrics for model endpoints such as
393
+ `predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user. Note that these
394
+ metrics are being calculated by the model monitoring stream pod.
395
+ :param endpoint_id: The unique id of the model endpoint.
396
+ :param metrics: A list of real-time metrics to return for the model endpoint.
397
+ :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
398
+ time, a Unix timestamp in milliseconds, a relative time (`'now'` or
399
+ `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
400
+ `'s'` = seconds), or 0 for the earliest time.
401
+ :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
402
+ time, a Unix timestamp in milliseconds, a relative time (`'now'` or
403
+ `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days, and
404
+ `'s'` = seconds), or 0 for the earliest time.
405
+ :return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
406
+ includes timestamps and the values.
407
+ """
408
+
409
+ if not metrics:
410
+ raise mlrun.errors.MLRunInvalidArgumentError(
411
+ "Metric names must be provided"
412
+ )
413
+
414
+ metrics_mapping = {}
415
+
416
+ try:
417
+ data = self._get_records(
418
+ table=mm_schemas.V3IOTSDBTables.EVENTS,
419
+ columns=["endpoint_id", *metrics],
420
+ filter_query=f"endpoint_id=='{endpoint_id}'",
421
+ start=start,
422
+ end=end,
423
+ )
424
+
425
+ # Fill the metrics mapping dictionary with the metric name and values
426
+ data_dict = data.to_dict()
427
+ for metric in metrics:
428
+ metric_data = data_dict.get(metric)
429
+ if metric_data is None:
430
+ continue
431
+
432
+ values = [
433
+ (str(timestamp), value) for timestamp, value in metric_data.items()
434
+ ]
435
+ metrics_mapping[metric] = values
436
+
437
+ except v3io_frames.Error as err:
438
+ logger.warn("Failed to read tsdb", err=err, endpoint=endpoint_id)
439
+
440
+ return metrics_mapping
441
+
442
+ def _get_records(
443
+ self,
444
+ table: str,
445
+ start: Union[datetime, str],
446
+ end: Union[datetime, str],
447
+ columns: Optional[list[str]] = None,
448
+ filter_query: str = "",
449
+ interval: Optional[str] = None,
450
+ agg_funcs: Optional[list[str]] = None,
451
+ sliding_window_step: Optional[str] = None,
452
+ **kwargs,
453
+ ) -> pd.DataFrame:
454
+ """
455
+ Getting records from V3IO TSDB data collection.
456
+ :param table: Path to the collection to query.
457
+ :param start: The start time of the metrics. Can be represented by a string containing an RFC
458
+ 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
459
+ `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
460
+ `'s'` = seconds), or 0 for the earliest time.
461
+ :param end: The end time of the metrics. Can be represented by a string containing an RFC
462
+ 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
463
+ `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
464
+ `'s'` = seconds), or 0 for the earliest time.
465
+ :param columns: Columns to include in the result.
466
+ :param filter_query: V3IO filter expression. The expected filter expression includes different
467
+ conditions, divided by ' AND '.
468
+ :param interval: The interval to aggregate the data by. Note that if interval is provided,
469
+ agg_funcs must bg provided as well. Provided as a string in the format of '1m',
470
+ '1h', etc.
471
+ :param agg_funcs: The aggregation functions to apply on the columns. Note that if `agg_funcs` is
472
+ provided, `interval` must bg provided as well. Provided as a list of strings in
473
+ the format of ['sum', 'avg', 'count', ...].
474
+ :param sliding_window_step: The time step for which the time window moves forward. Note that if
475
+ `sliding_window_step` is provided, interval must be provided as well. Provided
476
+ as a string in the format of '1m', '1h', etc.
477
+ :param kwargs: Additional keyword arguments passed to the read method of frames client.
478
+ :return: DataFrame with the provided attributes from the data collection.
479
+ :raise: MLRunNotFoundError if the provided table wasn't found.
480
+ """
481
+ if table not in self.tables:
482
+ raise mlrun.errors.MLRunNotFoundError(
483
+ f"Table '{table}' does not exist in the tables list of the TSDB connector. "
484
+ f"Available tables: {list(self.tables.keys())}"
485
+ )
486
+
487
+ # Frames client expects the aggregators to be a comma-separated string
488
+ aggregators = ",".join(agg_funcs) if agg_funcs else None
489
+ table_path = self.tables[table]
490
+ try:
491
+ df = self.frames_client.read(
492
+ backend=_TSDB_BE,
493
+ table=table_path,
494
+ start=start,
495
+ end=end,
496
+ columns=columns,
497
+ filter=filter_query,
498
+ aggregation_window=interval,
499
+ aggregators=aggregators,
500
+ step=sliding_window_step,
501
+ **kwargs,
502
+ )
503
+ except v3io_frames.Error as err:
504
+ if _is_no_schema_error(err):
505
+ return pd.DataFrame()
506
+ else:
507
+ raise err
508
+
509
+ return df
510
+
511
+ def _get_v3io_source_directory(self) -> str:
512
+ """
513
+ Get the V3IO source directory for the current project. Usually the source directory will
514
+ be under 'v3io:///users/pipelines/<project>'
515
+
516
+ :return: The V3IO source directory for the current project.
517
+ """
518
+ events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
519
+ project=self.project,
520
+ kind=mm_schemas.FileTargetKind.EVENTS,
521
+ )
522
+
523
+ # Generate the main directory with the V3IO resources
524
+ source_directory = (
525
+ mlrun.common.model_monitoring.helpers.parse_model_endpoint_project_prefix(
526
+ events_table_full_path, self.project
527
+ )
528
+ )
529
+
530
+ return source_directory
531
+
532
+ @staticmethod
533
+ def _get_v3io_frames_client(v3io_container: str) -> v3io_frames.client.ClientBase:
534
+ return mlrun.utils.v3io_clients.get_frames_client(
535
+ address=mlrun.mlconf.v3io_framesd,
536
+ container=v3io_container,
537
+ )
538
+
539
+ def read_metrics_data(
540
+ self,
541
+ *,
542
+ endpoint_id: str,
543
+ start: datetime,
544
+ end: datetime,
545
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
546
+ type: Literal["metrics", "results"] = "results",
547
+ ) -> Union[
548
+ list[
549
+ Union[
550
+ mm_schemas.ModelEndpointMonitoringResultValues,
551
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
552
+ ],
553
+ ],
554
+ list[
555
+ Union[
556
+ mm_schemas.ModelEndpointMonitoringMetricValues,
557
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
558
+ ],
559
+ ],
560
+ ]:
561
+ """
562
+ Read metrics OR results from the TSDB and return as a list.
563
+ Note: the type must match the actual metrics in the `metrics` parameter.
564
+ If the type is "results", pass only results in the `metrics` parameter.
565
+ """
566
+
567
+ if type == "metrics":
568
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
569
+ name = mm_schemas.MetricData.METRIC_NAME
570
+ columns = [mm_schemas.MetricData.METRIC_VALUE]
571
+ df_handler = self.df_to_metrics_values
572
+ elif type == "results":
573
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
574
+ name = mm_schemas.ResultData.RESULT_NAME
575
+ columns = [
576
+ mm_schemas.ResultData.RESULT_VALUE,
577
+ mm_schemas.ResultData.RESULT_STATUS,
578
+ mm_schemas.ResultData.RESULT_KIND,
579
+ ]
580
+ df_handler = self.df_to_results_values
581
+ else:
582
+ raise ValueError(f"Invalid {type = }")
583
+
584
+ query = self._get_sql_query(
585
+ endpoint_id=endpoint_id,
586
+ metric_and_app_names=[(metric.app, metric.name) for metric in metrics],
587
+ table_path=table_path,
588
+ name=name,
589
+ columns=columns,
590
+ )
591
+
592
+ logger.debug("Querying V3IO TSDB", query=query)
593
+
594
+ df: pd.DataFrame = self.frames_client.read(
595
+ backend=_TSDB_BE,
596
+ start=start,
597
+ end=end,
598
+ query=query, # the filter argument does not work for this complex condition
599
+ )
600
+
601
+ logger.debug(
602
+ "Converting a DataFrame to a list of metrics or results values",
603
+ table=table_path,
604
+ project=self.project,
605
+ endpoint_id=endpoint_id,
606
+ is_empty=df.empty,
607
+ )
608
+
609
+ return df_handler(df=df, metrics=metrics, project=self.project)
610
+
611
+ @staticmethod
612
+ def _get_sql_query(
613
+ *,
614
+ endpoint_id: str,
615
+ table_path: str,
616
+ name: str = mm_schemas.ResultData.RESULT_NAME,
617
+ metric_and_app_names: Optional[list[tuple[str, str]]] = None,
618
+ columns: Optional[list[str]] = None,
619
+ ) -> str:
620
+ """Get the SQL query for the results/metrics table"""
621
+ if columns:
622
+ selection = ",".join(columns)
623
+ else:
624
+ selection = "*"
625
+
626
+ with StringIO() as query:
627
+ query.write(
628
+ f"SELECT {selection} FROM '{table_path}' "
629
+ f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
630
+ )
631
+ if metric_and_app_names:
632
+ query.write(" AND (")
633
+
634
+ for i, (app_name, result_name) in enumerate(metric_and_app_names):
635
+ sub_cond = (
636
+ f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}' "
637
+ f"AND {name}='{result_name}')"
638
+ )
639
+ if i != 0: # not first sub condition
640
+ query.write(" OR ")
641
+ query.write(sub_cond)
642
+
643
+ query.write(")")
644
+
645
+ query.write(";")
646
+ return query.getvalue()
647
+
648
+ def read_predictions(
649
+ self,
650
+ *,
651
+ endpoint_id: str,
652
+ start: Union[datetime, str],
653
+ end: Union[datetime, str],
654
+ aggregation_window: Optional[str] = None,
655
+ agg_funcs: Optional[list[str]] = None,
656
+ ) -> Union[
657
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
658
+ mm_schemas.ModelEndpointMonitoringMetricValues,
659
+ ]:
660
+ if (agg_funcs and not aggregation_window) or (
661
+ aggregation_window and not agg_funcs
662
+ ):
663
+ raise mlrun.errors.MLRunInvalidArgumentError(
664
+ "both or neither of `aggregation_window` and `agg_funcs` must be provided"
665
+ )
666
+ df = self._get_records(
667
+ table=mm_schemas.FileTargetKind.PREDICTIONS,
668
+ start=start,
669
+ end=end,
670
+ columns=[mm_schemas.EventFieldType.LATENCY],
671
+ filter_query=f"endpoint_id=='{endpoint_id}'",
672
+ agg_funcs=agg_funcs,
673
+ sliding_window_step=aggregation_window,
674
+ )
675
+
676
+ full_name = get_invocations_fqn(self.project)
677
+
678
+ if df.empty:
679
+ return mm_schemas.ModelEndpointMonitoringMetricNoData(
680
+ full_name=full_name,
681
+ type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
682
+ )
683
+
684
+ latency_column = (
685
+ f"{agg_funcs[0]}({mm_schemas.EventFieldType.LATENCY})"
686
+ if agg_funcs
687
+ else mm_schemas.EventFieldType.LATENCY
688
+ )
689
+
690
+ return mm_schemas.ModelEndpointMonitoringMetricValues(
691
+ full_name=full_name,
692
+ values=list(
693
+ zip(
694
+ df.index,
695
+ df[latency_column],
696
+ )
697
+ ), # pyright: ignore[reportArgumentType]
698
+ )
699
+
700
+ def get_last_request(
701
+ self,
702
+ endpoint_ids: Union[str, list[str]],
703
+ start: Union[datetime, str] = "0",
704
+ end: Union[datetime, str] = "now",
705
+ ) -> pd.DataFrame:
706
+ endpoint_ids = (
707
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
708
+ )
709
+ df = self._get_records(
710
+ table=mm_schemas.FileTargetKind.PREDICTIONS,
711
+ start=start,
712
+ end=end,
713
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
714
+ agg_funcs=["last"],
715
+ )
716
+ if not df.empty:
717
+ df.rename(
718
+ columns={
719
+ f"last({mm_schemas.EventFieldType.LAST_REQUEST_TIMESTAMP})": mm_schemas.EventFieldType.LAST_REQUEST,
720
+ f"last({mm_schemas.EventFieldType.LATENCY})": f"last_{mm_schemas.EventFieldType.LATENCY}",
721
+ },
722
+ inplace=True,
723
+ )
724
+ df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
725
+ mm_schemas.EventFieldType.LAST_REQUEST
726
+ ].map(
727
+ lambda last_request: datetime.fromtimestamp(
728
+ last_request, tz=timezone.utc
729
+ )
730
+ )
731
+
732
+ return df.reset_index(drop=True)
733
+
734
+ def get_drift_status(
735
+ self,
736
+ endpoint_ids: Union[str, list[str]],
737
+ start: Union[datetime, str] = "now-24h",
738
+ end: Union[datetime, str] = "now",
739
+ ) -> pd.DataFrame:
740
+ endpoint_ids = (
741
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
742
+ )
743
+ df = self._get_records(
744
+ table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
745
+ start=start,
746
+ end=end,
747
+ columns=[mm_schemas.ResultData.RESULT_STATUS],
748
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
749
+ agg_funcs=["max"],
750
+ group_by="endpoint_id",
751
+ )
752
+ if not df.empty:
753
+ df.columns = [
754
+ col[len("max(") : -1] if "max(" in col else col for col in df.columns
755
+ ]
756
+ return df.reset_index(drop=True)
757
+
758
+ def get_metrics_metadata(
759
+ self,
760
+ endpoint_id: str,
761
+ start: Union[datetime, str] = "0",
762
+ end: Union[datetime, str] = "now",
763
+ ) -> pd.DataFrame:
764
+ df = self._get_records(
765
+ table=mm_schemas.V3IOTSDBTables.METRICS,
766
+ start=start,
767
+ end=end,
768
+ columns=[mm_schemas.MetricData.METRIC_VALUE],
769
+ filter_query=f"endpoint_id=='{endpoint_id}'",
770
+ agg_funcs=["last"],
771
+ )
772
+ if not df.empty:
773
+ df.drop(
774
+ columns=[f"last({mm_schemas.MetricData.METRIC_VALUE})"], inplace=True
775
+ )
776
+ return df.reset_index(drop=True)
777
+
778
+ def get_results_metadata(
779
+ self,
780
+ endpoint_id: str,
781
+ start: Union[datetime, str] = "0",
782
+ end: Union[datetime, str] = "now",
783
+ ) -> pd.DataFrame:
784
+ df = self._get_records(
785
+ table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
786
+ start=start,
787
+ end=end,
788
+ columns=[
789
+ mm_schemas.ResultData.RESULT_KIND,
790
+ ],
791
+ filter_query=f"endpoint_id=='{endpoint_id}'",
792
+ agg_funcs=["last"],
793
+ )
794
+ if not df.empty:
795
+ df.rename(
796
+ columns={
797
+ f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND
798
+ },
799
+ inplace=True,
800
+ )
801
+ return df.reset_index(drop=True)
802
+
803
+ def get_error_count(
804
+ self,
805
+ endpoint_ids: Union[str, list[str]],
806
+ start: Union[datetime, str] = "0",
807
+ end: Union[datetime, str] = "now",
808
+ ) -> pd.DataFrame:
809
+ endpoint_ids = (
810
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
811
+ )
812
+ df = self._get_records(
813
+ table=mm_schemas.FileTargetKind.ERRORS,
814
+ start=start,
815
+ end=end,
816
+ columns=[mm_schemas.EventFieldType.ERROR_COUNT],
817
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
818
+ agg_funcs=["count"],
819
+ )
820
+ if not df.empty:
821
+ df.rename(
822
+ columns={
823
+ f"count({mm_schemas.EventFieldType.ERROR_COUNT})": mm_schemas.EventFieldType.ERROR_COUNT
824
+ },
825
+ inplace=True,
826
+ )
827
+ df.dropna(inplace=True)
828
+ return df.reset_index(drop=True)
829
+
830
+ def get_avg_latency(
831
+ self,
832
+ endpoint_ids: Union[str, list[str]],
833
+ start: Union[datetime, str] = "0",
834
+ end: Union[datetime, str] = "now",
835
+ ) -> pd.DataFrame:
836
+ endpoint_ids = (
837
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
838
+ )
839
+ df = self._get_records(
840
+ table=mm_schemas.FileTargetKind.PREDICTIONS,
841
+ start=start,
842
+ end=end,
843
+ columns=[mm_schemas.EventFieldType.LATENCY],
844
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
845
+ agg_funcs=["avg"],
846
+ )
847
+ if not df.empty:
848
+ df.dropna(inplace=True)
849
+ return df.reset_index(drop=True)