mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +71 -36
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +54 -16
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +21 -16
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  243. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  244. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  245. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  246. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  247. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  248. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  249. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  250. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  251. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  252. mlrun/model_monitoring/model_endpoint.py +0 -118
  253. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  254. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  255. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +0 -0
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -13,12 +13,14 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import json
16
+ from datetime import datetime
16
17
 
17
18
  import mlrun.feature_store.steps
18
19
  from mlrun.common.schemas.model_monitoring import (
19
20
  EventFieldType,
20
21
  EventKeyMetrics,
21
22
  )
23
+ from mlrun.utils import logger
22
24
 
23
25
 
24
26
  class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
@@ -40,3 +42,34 @@ class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
40
42
  event[EventFieldType.TABLE_COLUMN] = "_" + event.get(EventFieldType.ENDPOINT_ID)
41
43
 
42
44
  return event
45
+
46
+
47
+ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
48
+ def __init__(self, **kwargs):
49
+ """
50
+ Prepare the event for insertion into the TDEngine error table
51
+ """
52
+ super().__init__(**kwargs)
53
+
54
+ def do(self, event):
55
+ error = str(event.get("error"))
56
+ if len(error) > 1000:
57
+ error = error[-1000:]
58
+ logger.warning(
59
+ f"Error message exceeds 1000 chars: The error message writen to TSDB will be it last "
60
+ f"1000 chars, Error: {error}",
61
+ event=event,
62
+ )
63
+ timestamp = datetime.fromisoformat(event.get("when"))
64
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
65
+ event = {
66
+ EventFieldType.MODEL_ERROR: error,
67
+ EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
68
+ EventFieldType.ENDPOINT_ID: endpoint_id,
69
+ EventFieldType.TIME: timestamp,
70
+ EventFieldType.PROJECT: event[EventFieldType.FUNCTION_URI].split("/")[0],
71
+ EventFieldType.TABLE_COLUMN: "_err_"
72
+ + event.get(EventFieldType.ENDPOINT_ID),
73
+ }
74
+ logger.info("Write error to errors TSDB table", event=event)
75
+ return event
@@ -13,8 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import typing
16
- from datetime import datetime
17
- from typing import Union
16
+ from datetime import datetime, timedelta, timezone
18
17
 
19
18
  import pandas as pd
20
19
  import taosws
@@ -90,6 +89,9 @@ class TDEngineConnector(TSDBConnector):
90
89
  mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
91
90
  project=self.project, database=self.database
92
91
  ),
92
+ mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
93
+ project=self.project, database=self.database
94
+ ),
93
95
  }
94
96
 
95
97
  def create_tables(self):
@@ -122,7 +124,6 @@ class TDEngineConnector(TSDBConnector):
122
124
  table_name = (
123
125
  f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
124
126
  ).replace("-", "_")
125
- event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
126
127
 
127
128
  else:
128
129
  # Write a new metric
@@ -163,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
163
164
  def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
164
165
  return datetime.fromisoformat(val) if isinstance(val, str) else val
165
166
 
166
- def apply_monitoring_stream_steps(self, graph):
167
+ def apply_monitoring_stream_steps(self, graph, **kwarg):
167
168
  """
168
169
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
169
170
  different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -196,7 +197,6 @@ class TDEngineConnector(TSDBConnector):
196
197
  mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
197
198
  ],
198
199
  tag_cols=[
199
- mm_schemas.EventFieldType.PROJECT,
200
200
  mm_schemas.EventFieldType.ENDPOINT_ID,
201
201
  ],
202
202
  max_events=1000,
@@ -209,8 +209,37 @@ class TDEngineConnector(TSDBConnector):
209
209
  after="ProcessBeforeTDEngine",
210
210
  )
211
211
 
212
- def handle_model_error(self, graph, **kwargs) -> None:
213
- pass
212
+ def handle_model_error(
213
+ self,
214
+ graph,
215
+ tsdb_batching_max_events: int = 1000,
216
+ tsdb_batching_timeout_secs: int = 30,
217
+ **kwargs,
218
+ ) -> None:
219
+ graph.add_step(
220
+ "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
221
+ name="error_extractor",
222
+ after="ForwardError",
223
+ )
224
+ graph.add_step(
225
+ "storey.TDEngineTarget",
226
+ name="tsdb_error",
227
+ after="error_extractor",
228
+ url=self._tdengine_connection_string,
229
+ supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
230
+ table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
231
+ time_col=mm_schemas.EventFieldType.TIME,
232
+ database=self.database,
233
+ columns=[
234
+ mm_schemas.EventFieldType.MODEL_ERROR,
235
+ ],
236
+ tag_cols=[
237
+ mm_schemas.EventFieldType.ENDPOINT_ID,
238
+ mm_schemas.EventFieldType.ERROR_TYPE,
239
+ ],
240
+ max_events=tsdb_batching_max_events,
241
+ flush_after_seconds=tsdb_batching_timeout_secs,
242
+ )
214
243
 
215
244
  def delete_tsdb_resources(self):
216
245
  """
@@ -265,6 +294,10 @@ class TDEngineConnector(TSDBConnector):
265
294
  limit: typing.Optional[int] = None,
266
295
  sliding_window_step: typing.Optional[str] = None,
267
296
  timestamp_column: str = mm_schemas.EventFieldType.TIME,
297
+ group_by: typing.Optional[typing.Union[list[str], str]] = None,
298
+ preform_agg_columns: typing.Optional[list] = None,
299
+ order_by: typing.Optional[str] = None,
300
+ desc: typing.Optional[bool] = None,
268
301
  ) -> pd.DataFrame:
269
302
  """
270
303
  Getting records from TSDB data collection.
@@ -284,6 +317,14 @@ class TDEngineConnector(TSDBConnector):
284
317
  `sliding_window_step` is provided, interval must be provided as well. Provided
285
318
  as a string in the format of '1m', '1h', etc.
286
319
  :param timestamp_column: The column name that holds the timestamp index.
320
+ :param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
321
+ functions must bg provided
322
+ :param preform_agg_columns: The columns to preform aggregation on.
323
+ notice that all aggregation functions provided will preform on those columns.
324
+ If not provided The default behavior is to preform on all columns in columns,
325
+ if an empty list was provided The aggregation won't be performed.
326
+ :param order_by: The column or alias to preform ordering on the query.
327
+ :param desc: Whether or not to sort the results in descending order.
287
328
 
288
329
  :return: DataFrame with the provided attributes from the data collection.
289
330
  :raise: MLRunInvalidArgumentError if query the provided table failed.
@@ -301,6 +342,10 @@ class TDEngineConnector(TSDBConnector):
301
342
  sliding_window_step=sliding_window_step,
302
343
  timestamp_column=timestamp_column,
303
344
  database=self.database,
345
+ group_by=group_by,
346
+ preform_agg_funcs_columns=preform_agg_columns,
347
+ order_by=order_by,
348
+ desc=desc,
304
349
  )
305
350
  logger.debug("Querying TDEngine", query=full_query)
306
351
  try:
@@ -323,6 +368,7 @@ class TDEngineConnector(TSDBConnector):
323
368
  end: datetime,
324
369
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
325
370
  type: typing.Literal["metrics", "results"],
371
+ with_result_extra_data: bool = False,
326
372
  ) -> typing.Union[
327
373
  list[
328
374
  typing.Union[
@@ -340,6 +386,12 @@ class TDEngineConnector(TSDBConnector):
340
386
  timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
341
387
  columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
342
388
  if type == "metrics":
389
+ if with_result_extra_data:
390
+ logger.warning(
391
+ "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
392
+ project=self.project,
393
+ endpoint_id=endpoint_id,
394
+ )
343
395
  table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
344
396
  name = mm_schemas.MetricData.METRIC_NAME
345
397
  columns += [name, mm_schemas.MetricData.METRIC_VALUE]
@@ -353,6 +405,8 @@ class TDEngineConnector(TSDBConnector):
353
405
  mm_schemas.ResultData.RESULT_STATUS,
354
406
  mm_schemas.ResultData.RESULT_KIND,
355
407
  ]
408
+ if with_result_extra_data:
409
+ columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
356
410
  df_handler = self.df_to_results_values
357
411
  else:
358
412
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -389,6 +443,10 @@ class TDEngineConnector(TSDBConnector):
389
443
  is_empty=df.empty,
390
444
  )
391
445
 
446
+ if not with_result_extra_data and type == "results":
447
+ # Set the extra data to an empty string if it's not requested
448
+ df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
449
+
392
450
  return df_handler(df=df, metrics=metrics, project=self.project)
393
451
 
394
452
  def read_predictions(
@@ -452,51 +510,219 @@ class TDEngineConnector(TSDBConnector):
452
510
 
453
511
  def get_last_request(
454
512
  self,
455
- endpoint_ids: Union[str, list[str]],
456
- start: Union[datetime, str] = "0",
457
- end: Union[datetime, str] = "now",
513
+ endpoint_ids: typing.Union[str, list[str]],
514
+ start: typing.Optional[datetime] = None,
515
+ end: typing.Optional[datetime] = None,
458
516
  ) -> pd.DataFrame:
459
- pass
517
+ endpoint_ids = (
518
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
519
+ )
520
+ start, end = self._get_start_end(start, end)
521
+ df = self._get_records(
522
+ table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
523
+ start=start,
524
+ end=end,
525
+ columns=[
526
+ mm_schemas.EventFieldType.ENDPOINT_ID,
527
+ mm_schemas.EventFieldType.TIME,
528
+ mm_schemas.EventFieldType.LATENCY,
529
+ ],
530
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
531
+ timestamp_column=mm_schemas.EventFieldType.TIME,
532
+ agg_funcs=["last"],
533
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
534
+ preform_agg_columns=[mm_schemas.EventFieldType.TIME],
535
+ )
536
+ if not df.empty:
537
+ df.dropna(inplace=True)
538
+ df.rename(
539
+ columns={
540
+ f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
541
+ f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
542
+ },
543
+ inplace=True,
544
+ )
545
+ df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
546
+ mm_schemas.EventFieldType.LAST_REQUEST
547
+ ].map(
548
+ lambda last_request: datetime.strptime(
549
+ last_request, "%Y-%m-%d %H:%M:%S.%f %z"
550
+ ).astimezone(tz=timezone.utc)
551
+ )
552
+ return df
460
553
 
461
554
  def get_drift_status(
462
555
  self,
463
- endpoint_ids: Union[str, list[str]],
464
- start: Union[datetime, str] = "now-24h",
465
- end: Union[datetime, str] = "now",
556
+ endpoint_ids: typing.Union[str, list[str]],
557
+ start: typing.Optional[datetime] = None,
558
+ end: typing.Optional[datetime] = None,
466
559
  ) -> pd.DataFrame:
467
- pass
560
+ endpoint_ids = (
561
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
562
+ )
563
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
564
+ start, end = self._get_start_end(start, end)
565
+ df = self._get_records(
566
+ table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
567
+ start=start,
568
+ end=end,
569
+ columns=[
570
+ mm_schemas.ResultData.RESULT_STATUS,
571
+ mm_schemas.EventFieldType.ENDPOINT_ID,
572
+ ],
573
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
574
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
575
+ agg_funcs=["max"],
576
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
577
+ preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
578
+ )
579
+ df.rename(
580
+ columns={
581
+ f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
582
+ },
583
+ inplace=True,
584
+ )
585
+ if not df.empty:
586
+ df.dropna(inplace=True)
587
+ return df
468
588
 
469
589
  def get_metrics_metadata(
470
590
  self,
471
591
  endpoint_id: str,
472
- start: Union[datetime, str] = "0",
473
- end: Union[datetime, str] = "now",
592
+ start: typing.Optional[datetime] = None,
593
+ end: typing.Optional[datetime] = None,
474
594
  ) -> pd.DataFrame:
475
- pass
595
+ start, end = self._get_start_end(start, end)
596
+ df = self._get_records(
597
+ table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
598
+ start=start,
599
+ end=end,
600
+ columns=[
601
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
602
+ mm_schemas.MetricData.METRIC_NAME,
603
+ mm_schemas.EventFieldType.ENDPOINT_ID,
604
+ ],
605
+ filter_query=f"endpoint_id='{endpoint_id}'",
606
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
607
+ group_by=[
608
+ mm_schemas.WriterEvent.APPLICATION_NAME,
609
+ mm_schemas.MetricData.METRIC_NAME,
610
+ ],
611
+ agg_funcs=["last"],
612
+ )
613
+ df.rename(
614
+ columns={
615
+ f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
616
+ f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
617
+ f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
618
+ },
619
+ inplace=True,
620
+ )
621
+ if not df.empty:
622
+ df.dropna(inplace=True)
623
+ return df
476
624
 
477
625
  def get_results_metadata(
478
626
  self,
479
627
  endpoint_id: str,
480
- start: Union[datetime, str] = "0",
481
- end: Union[datetime, str] = "now",
628
+ start: typing.Optional[datetime] = None,
629
+ end: typing.Optional[datetime] = None,
482
630
  ) -> pd.DataFrame:
483
- pass
631
+ start, end = self._get_start_end(start, end)
632
+ df = self._get_records(
633
+ table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
634
+ start=start,
635
+ end=end,
636
+ columns=[
637
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
638
+ mm_schemas.ResultData.RESULT_NAME,
639
+ mm_schemas.ResultData.RESULT_KIND,
640
+ mm_schemas.EventFieldType.ENDPOINT_ID,
641
+ ],
642
+ filter_query=f"endpoint_id='{endpoint_id}'",
643
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
644
+ group_by=[
645
+ mm_schemas.WriterEvent.APPLICATION_NAME,
646
+ mm_schemas.ResultData.RESULT_NAME,
647
+ ],
648
+ agg_funcs=["last"],
649
+ )
650
+ df.rename(
651
+ columns={
652
+ f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
653
+ f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
654
+ f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
655
+ f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
656
+ },
657
+ inplace=True,
658
+ )
659
+ if not df.empty:
660
+ df.dropna(inplace=True)
661
+ return df
484
662
 
485
663
  def get_error_count(
486
664
  self,
487
- endpoint_ids: Union[str, list[str]],
488
- start: Union[datetime, str] = "0",
489
- end: Union[datetime, str] = "now",
665
+ endpoint_ids: typing.Union[str, list[str]],
666
+ start: typing.Optional[datetime] = None,
667
+ end: typing.Optional[datetime] = None,
490
668
  ) -> pd.DataFrame:
491
- pass
669
+ endpoint_ids = (
670
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
671
+ )
672
+ start, end = self._get_start_end(start, end)
673
+ df = self._get_records(
674
+ table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
675
+ start=start,
676
+ end=end,
677
+ columns=[
678
+ mm_schemas.EventFieldType.MODEL_ERROR,
679
+ mm_schemas.EventFieldType.ENDPOINT_ID,
680
+ ],
681
+ agg_funcs=["count"],
682
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
683
+ f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
684
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
685
+ preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
686
+ )
687
+ df.rename(
688
+ columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
689
+ inplace=True,
690
+ )
691
+ if not df.empty:
692
+ df.dropna(inplace=True)
693
+ return df
492
694
 
493
695
  def get_avg_latency(
494
696
  self,
495
- endpoint_ids: Union[str, list[str]],
496
- start: Union[datetime, str] = "0",
497
- end: Union[datetime, str] = "now",
697
+ endpoint_ids: typing.Union[str, list[str]],
698
+ start: typing.Optional[datetime] = None,
699
+ end: typing.Optional[datetime] = None,
498
700
  ) -> pd.DataFrame:
499
- pass
701
+ endpoint_ids = (
702
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
703
+ )
704
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
705
+ start, end = self._get_start_end(start, end)
706
+ df = self._get_records(
707
+ table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
708
+ start=start,
709
+ end=end,
710
+ columns=[
711
+ mm_schemas.EventFieldType.LATENCY,
712
+ mm_schemas.EventFieldType.ENDPOINT_ID,
713
+ ],
714
+ agg_funcs=["avg"],
715
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
716
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
717
+ preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
718
+ )
719
+ df.rename(
720
+ columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
721
+ inplace=True,
722
+ )
723
+ if not df.empty:
724
+ df.dropna(inplace=True)
725
+ return df
500
726
 
501
727
  # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
502
728
  #
@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
150
150
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
151
151
  event = {
152
152
  EventFieldType.MODEL_ERROR: str(error),
153
+ EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
153
154
  EventFieldType.ENDPOINT_ID: endpoint_id,
154
155
  EventFieldType.TIMESTAMP: timestamp,
155
156
  EventFieldType.ERROR_COUNT: 1.0,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from datetime import datetime, timezone
15
+ from datetime import datetime, timedelta, timezone
16
16
  from io import StringIO
17
17
  from typing import Literal, Optional, Union
18
18
 
@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
168
168
  tsdb_batching_max_events: int = 1000,
169
169
  tsdb_batching_timeout_secs: int = 30,
170
170
  sample_window: int = 10,
171
+ aggregate_windows: Optional[list[str]] = None,
172
+ aggregate_period: str = "1m",
173
+ **kwarg,
171
174
  ):
172
175
  """
173
176
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
178
181
  - endpoint_features (Prediction and feature names and values)
179
182
  - custom_metrics (user-defined metrics)
180
183
  """
184
+ aggregate_windows = aggregate_windows or ["5m", "1h"]
181
185
 
186
+ # Calculate number of predictions and average latency
187
+ def apply_storey_aggregations():
188
+ # Calculate number of predictions for each window (5 min and 1 hour by default)
189
+ graph.add_step(
190
+ class_name="storey.AggregateByKey",
191
+ aggregates=[
192
+ {
193
+ "name": EventFieldType.LATENCY,
194
+ "column": EventFieldType.LATENCY,
195
+ "operations": ["count", "avg"],
196
+ "windows": aggregate_windows,
197
+ "period": aggregate_period,
198
+ }
199
+ ],
200
+ name=EventFieldType.LATENCY,
201
+ after="MapFeatureNames",
202
+ step_name="Aggregates",
203
+ table=".",
204
+ key_field=EventFieldType.ENDPOINT_ID,
205
+ )
206
+ # Calculate average latency time for each window (5 min and 1 hour by default)
207
+ graph.add_step(
208
+ class_name="storey.Rename",
209
+ mapping={
210
+ "latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
211
+ "latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
212
+ },
213
+ name="Rename",
214
+ after=EventFieldType.LATENCY,
215
+ )
216
+
217
+ apply_storey_aggregations()
182
218
  # Write latency per prediction, labeled by endpoint ID only
183
219
  graph.add_step(
184
220
  "storey.TSDBTarget",
@@ -310,6 +346,7 @@ class V3IOTSDBConnector(TSDBConnector):
310
346
  ],
311
347
  index_cols=[
312
348
  mm_schemas.EventFieldType.ENDPOINT_ID,
349
+ mm_schemas.EventFieldType.ERROR_TYPE,
313
350
  ],
314
351
  max_events=tsdb_batching_max_events,
315
352
  flush_after_seconds=tsdb_batching_timeout_secs,
@@ -338,9 +375,6 @@ class V3IOTSDBConnector(TSDBConnector):
338
375
  elif kind == mm_schemas.WriterEventKind.RESULT:
339
376
  table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
340
377
  index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
341
- event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
342
- # TODO: remove this when extra data is supported (ML-7460)
343
- event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
344
378
  else:
345
379
  raise ValueError(f"Invalid {kind = }")
346
380
 
@@ -544,6 +578,7 @@ class V3IOTSDBConnector(TSDBConnector):
544
578
  end: datetime,
545
579
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
546
580
  type: Literal["metrics", "results"] = "results",
581
+ with_result_extra_data: bool = False,
547
582
  ) -> Union[
548
583
  list[
549
584
  Union[
@@ -565,6 +600,12 @@ class V3IOTSDBConnector(TSDBConnector):
565
600
  """
566
601
 
567
602
  if type == "metrics":
603
+ if with_result_extra_data:
604
+ logger.warning(
605
+ "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
606
+ project=self.project,
607
+ endpoint_id=endpoint_id,
608
+ )
568
609
  table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
569
610
  name = mm_schemas.MetricData.METRIC_NAME
570
611
  columns = [mm_schemas.MetricData.METRIC_VALUE]
@@ -577,6 +618,8 @@ class V3IOTSDBConnector(TSDBConnector):
577
618
  mm_schemas.ResultData.RESULT_STATUS,
578
619
  mm_schemas.ResultData.RESULT_KIND,
579
620
  ]
621
+ if with_result_extra_data:
622
+ columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
580
623
  df_handler = self.df_to_results_values
581
624
  else:
582
625
  raise ValueError(f"Invalid {type = }")
@@ -605,6 +648,9 @@ class V3IOTSDBConnector(TSDBConnector):
605
648
  endpoint_id=endpoint_id,
606
649
  is_empty=df.empty,
607
650
  )
651
+ if not with_result_extra_data and type == "results":
652
+ # Set the extra data to an empty string if it's not requested
653
+ df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
608
654
 
609
655
  return df_handler(df=df, metrics=metrics, project=self.project)
610
656
 
@@ -700,12 +746,13 @@ class V3IOTSDBConnector(TSDBConnector):
700
746
  def get_last_request(
701
747
  self,
702
748
  endpoint_ids: Union[str, list[str]],
703
- start: Union[datetime, str] = "0",
704
- end: Union[datetime, str] = "now",
749
+ start: Optional[datetime] = None,
750
+ end: Optional[datetime] = None,
705
751
  ) -> pd.DataFrame:
706
752
  endpoint_ids = (
707
753
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
708
754
  )
755
+ start, end = self._get_start_end(start, end)
709
756
  df = self._get_records(
710
757
  table=mm_schemas.FileTargetKind.PREDICTIONS,
711
758
  start=start,
@@ -734,12 +781,14 @@ class V3IOTSDBConnector(TSDBConnector):
734
781
  def get_drift_status(
735
782
  self,
736
783
  endpoint_ids: Union[str, list[str]],
737
- start: Union[datetime, str] = "now-24h",
738
- end: Union[datetime, str] = "now",
784
+ start: Optional[datetime] = None,
785
+ end: Optional[datetime] = None,
739
786
  ) -> pd.DataFrame:
740
787
  endpoint_ids = (
741
788
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
742
789
  )
790
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
791
+ start, end = self._get_start_end(start, end)
743
792
  df = self._get_records(
744
793
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
745
794
  start=start,
@@ -758,9 +807,10 @@ class V3IOTSDBConnector(TSDBConnector):
758
807
  def get_metrics_metadata(
759
808
  self,
760
809
  endpoint_id: str,
761
- start: Union[datetime, str] = "0",
762
- end: Union[datetime, str] = "now",
810
+ start: Optional[datetime] = None,
811
+ end: Optional[datetime] = None,
763
812
  ) -> pd.DataFrame:
813
+ start, end = self._get_start_end(start, end)
764
814
  df = self._get_records(
765
815
  table=mm_schemas.V3IOTSDBTables.METRICS,
766
816
  start=start,
@@ -778,9 +828,10 @@ class V3IOTSDBConnector(TSDBConnector):
778
828
  def get_results_metadata(
779
829
  self,
780
830
  endpoint_id: str,
781
- start: Union[datetime, str] = "0",
782
- end: Union[datetime, str] = "now",
831
+ start: Optional[datetime] = None,
832
+ end: Optional[datetime] = None,
783
833
  ) -> pd.DataFrame:
834
+ start, end = self._get_start_end(start, end)
784
835
  df = self._get_records(
785
836
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
786
837
  start=start,
@@ -803,18 +854,20 @@ class V3IOTSDBConnector(TSDBConnector):
803
854
  def get_error_count(
804
855
  self,
805
856
  endpoint_ids: Union[str, list[str]],
806
- start: Union[datetime, str] = "0",
807
- end: Union[datetime, str] = "now",
857
+ start: Optional[datetime] = None,
858
+ end: Optional[datetime] = None,
808
859
  ) -> pd.DataFrame:
809
860
  endpoint_ids = (
810
861
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
811
862
  )
863
+ start, end = self._get_start_end(start, end)
812
864
  df = self._get_records(
813
865
  table=mm_schemas.FileTargetKind.ERRORS,
814
866
  start=start,
815
867
  end=end,
816
868
  columns=[mm_schemas.EventFieldType.ERROR_COUNT],
817
- filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
869
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
870
+ f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
818
871
  agg_funcs=["count"],
819
872
  )
820
873
  if not df.empty:
@@ -830,12 +883,14 @@ class V3IOTSDBConnector(TSDBConnector):
830
883
  def get_avg_latency(
831
884
  self,
832
885
  endpoint_ids: Union[str, list[str]],
833
- start: Union[datetime, str] = "0",
834
- end: Union[datetime, str] = "now",
886
+ start: Optional[datetime] = None,
887
+ end: Optional[datetime] = None,
835
888
  ) -> pd.DataFrame:
836
889
  endpoint_ids = (
837
890
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
838
891
  )
892
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
893
+ start, end = self._get_start_end(start, end)
839
894
  df = self._get_records(
840
895
  table=mm_schemas.FileTargetKind.PREDICTIONS,
841
896
  start=start,
@@ -846,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
846
901
  )
847
902
  if not df.empty:
848
903
  df.dropna(inplace=True)
904
+ df.rename(
905
+ columns={
906
+ f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
907
+ },
908
+ inplace=True,
909
+ )
849
910
  return df.reset_index(drop=True)