mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (222) hide show
  1. mlrun/__init__.py +14 -12
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +19 -12
  4. mlrun/artifacts/__init__.py +0 -2
  5. mlrun/artifacts/base.py +34 -11
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/manager.py +13 -13
  8. mlrun/artifacts/model.py +66 -53
  9. mlrun/common/constants.py +6 -0
  10. mlrun/common/formatters/__init__.py +1 -0
  11. mlrun/common/formatters/feature_set.py +1 -0
  12. mlrun/common/formatters/function.py +1 -0
  13. mlrun/common/formatters/model_endpoint.py +30 -0
  14. mlrun/common/formatters/pipeline.py +1 -2
  15. mlrun/common/model_monitoring/__init__.py +0 -3
  16. mlrun/common/model_monitoring/helpers.py +1 -1
  17. mlrun/common/runtimes/constants.py +1 -2
  18. mlrun/common/schemas/__init__.py +4 -2
  19. mlrun/common/schemas/artifact.py +0 -6
  20. mlrun/common/schemas/common.py +50 -0
  21. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  22. mlrun/common/schemas/model_monitoring/constants.py +62 -12
  23. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
  24. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -5
  25. mlrun/common/schemas/partition.py +122 -0
  26. mlrun/config.py +43 -15
  27. mlrun/data_types/__init__.py +0 -2
  28. mlrun/data_types/data_types.py +0 -1
  29. mlrun/data_types/infer.py +3 -1
  30. mlrun/data_types/spark.py +4 -4
  31. mlrun/data_types/to_pandas.py +2 -11
  32. mlrun/datastore/__init__.py +0 -2
  33. mlrun/datastore/alibaba_oss.py +4 -1
  34. mlrun/datastore/azure_blob.py +4 -1
  35. mlrun/datastore/base.py +12 -4
  36. mlrun/datastore/datastore.py +9 -3
  37. mlrun/datastore/datastore_profile.py +1 -1
  38. mlrun/datastore/dbfs_store.py +4 -1
  39. mlrun/datastore/filestore.py +4 -1
  40. mlrun/datastore/google_cloud_storage.py +4 -1
  41. mlrun/datastore/hdfs.py +4 -1
  42. mlrun/datastore/inmem.py +4 -1
  43. mlrun/datastore/redis.py +4 -1
  44. mlrun/datastore/s3.py +4 -1
  45. mlrun/datastore/sources.py +51 -49
  46. mlrun/datastore/store_resources.py +0 -2
  47. mlrun/datastore/targets.py +22 -23
  48. mlrun/datastore/utils.py +2 -2
  49. mlrun/datastore/v3io.py +4 -1
  50. mlrun/datastore/wasbfs/fs.py +13 -12
  51. mlrun/db/base.py +126 -62
  52. mlrun/db/factory.py +3 -0
  53. mlrun/db/httpdb.py +767 -231
  54. mlrun/db/nopdb.py +126 -57
  55. mlrun/errors.py +2 -2
  56. mlrun/execution.py +55 -29
  57. mlrun/feature_store/__init__.py +0 -2
  58. mlrun/feature_store/api.py +40 -40
  59. mlrun/feature_store/common.py +9 -9
  60. mlrun/feature_store/feature_set.py +20 -18
  61. mlrun/feature_store/feature_vector.py +27 -24
  62. mlrun/feature_store/retrieval/base.py +14 -9
  63. mlrun/feature_store/retrieval/job.py +2 -1
  64. mlrun/feature_store/steps.py +2 -2
  65. mlrun/features.py +30 -13
  66. mlrun/frameworks/__init__.py +1 -2
  67. mlrun/frameworks/_common/__init__.py +1 -2
  68. mlrun/frameworks/_common/artifacts_library.py +2 -2
  69. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  70. mlrun/frameworks/_common/model_handler.py +29 -27
  71. mlrun/frameworks/_common/producer.py +3 -1
  72. mlrun/frameworks/_dl_common/__init__.py +1 -2
  73. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  74. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  75. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  76. mlrun/frameworks/_ml_common/__init__.py +1 -2
  77. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  78. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  79. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  80. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  81. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  82. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  83. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  84. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  85. mlrun/frameworks/huggingface/__init__.py +1 -2
  86. mlrun/frameworks/huggingface/model_server.py +9 -9
  87. mlrun/frameworks/lgbm/__init__.py +47 -44
  88. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  89. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  90. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  91. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  92. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  93. mlrun/frameworks/lgbm/model_handler.py +15 -11
  94. mlrun/frameworks/lgbm/model_server.py +11 -7
  95. mlrun/frameworks/lgbm/utils.py +2 -2
  96. mlrun/frameworks/onnx/__init__.py +1 -2
  97. mlrun/frameworks/onnx/dataset.py +3 -3
  98. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  99. mlrun/frameworks/onnx/model_handler.py +7 -5
  100. mlrun/frameworks/onnx/model_server.py +8 -6
  101. mlrun/frameworks/parallel_coordinates.py +11 -11
  102. mlrun/frameworks/pytorch/__init__.py +22 -23
  103. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  104. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  105. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  106. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  107. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  108. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  109. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  110. mlrun/frameworks/pytorch/model_handler.py +21 -17
  111. mlrun/frameworks/pytorch/model_server.py +13 -9
  112. mlrun/frameworks/sklearn/__init__.py +19 -18
  113. mlrun/frameworks/sklearn/estimator.py +2 -2
  114. mlrun/frameworks/sklearn/metric.py +3 -3
  115. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  116. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  117. mlrun/frameworks/sklearn/model_handler.py +4 -3
  118. mlrun/frameworks/tf_keras/__init__.py +11 -12
  119. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  120. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  121. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  122. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  123. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  124. mlrun/frameworks/tf_keras/model_server.py +12 -8
  125. mlrun/frameworks/xgboost/__init__.py +19 -18
  126. mlrun/frameworks/xgboost/model_handler.py +13 -9
  127. mlrun/launcher/base.py +3 -4
  128. mlrun/launcher/local.py +1 -1
  129. mlrun/launcher/remote.py +1 -1
  130. mlrun/lists.py +4 -3
  131. mlrun/model.py +108 -44
  132. mlrun/model_monitoring/__init__.py +1 -2
  133. mlrun/model_monitoring/api.py +6 -6
  134. mlrun/model_monitoring/applications/_application_steps.py +13 -15
  135. mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
  136. mlrun/model_monitoring/applications/results.py +55 -3
  137. mlrun/model_monitoring/controller.py +185 -223
  138. mlrun/model_monitoring/db/_schedules.py +156 -0
  139. mlrun/model_monitoring/db/_stats.py +189 -0
  140. mlrun/model_monitoring/db/stores/__init__.py +1 -1
  141. mlrun/model_monitoring/db/stores/base/store.py +6 -65
  142. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
  143. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
  144. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
  145. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
  146. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
  147. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
  148. mlrun/model_monitoring/db/tsdb/base.py +74 -22
  149. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +66 -35
  150. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  151. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +284 -51
  152. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  153. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
  154. mlrun/model_monitoring/helpers.py +97 -1
  155. mlrun/model_monitoring/model_endpoint.py +4 -2
  156. mlrun/model_monitoring/stream_processing.py +2 -2
  157. mlrun/model_monitoring/tracking_policy.py +10 -3
  158. mlrun/model_monitoring/writer.py +47 -26
  159. mlrun/package/__init__.py +3 -6
  160. mlrun/package/context_handler.py +1 -1
  161. mlrun/package/packager.py +12 -9
  162. mlrun/package/packagers/__init__.py +0 -2
  163. mlrun/package/packagers/default_packager.py +14 -11
  164. mlrun/package/packagers/numpy_packagers.py +16 -7
  165. mlrun/package/packagers/pandas_packagers.py +18 -18
  166. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  167. mlrun/package/packagers_manager.py +31 -14
  168. mlrun/package/utils/__init__.py +0 -3
  169. mlrun/package/utils/_pickler.py +6 -6
  170. mlrun/platforms/__init__.py +3 -3
  171. mlrun/platforms/iguazio.py +4 -1
  172. mlrun/projects/__init__.py +1 -6
  173. mlrun/projects/operations.py +27 -27
  174. mlrun/projects/pipelines.py +85 -215
  175. mlrun/projects/project.py +444 -158
  176. mlrun/run.py +9 -9
  177. mlrun/runtimes/__init__.py +1 -3
  178. mlrun/runtimes/base.py +13 -10
  179. mlrun/runtimes/daskjob.py +9 -9
  180. mlrun/runtimes/generators.py +2 -1
  181. mlrun/runtimes/kubejob.py +4 -5
  182. mlrun/runtimes/mpijob/__init__.py +0 -2
  183. mlrun/runtimes/mpijob/abstract.py +7 -6
  184. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  185. mlrun/runtimes/nuclio/application/application.py +11 -11
  186. mlrun/runtimes/nuclio/function.py +14 -13
  187. mlrun/runtimes/nuclio/serving.py +9 -9
  188. mlrun/runtimes/pod.py +74 -29
  189. mlrun/runtimes/remotesparkjob.py +3 -2
  190. mlrun/runtimes/sparkjob/__init__.py +0 -2
  191. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  192. mlrun/runtimes/utils.py +6 -5
  193. mlrun/serving/merger.py +6 -4
  194. mlrun/serving/remote.py +18 -17
  195. mlrun/serving/routers.py +27 -27
  196. mlrun/serving/server.py +1 -1
  197. mlrun/serving/states.py +76 -71
  198. mlrun/serving/utils.py +13 -2
  199. mlrun/serving/v1_serving.py +3 -2
  200. mlrun/serving/v2_serving.py +4 -4
  201. mlrun/track/__init__.py +1 -1
  202. mlrun/track/tracker.py +2 -2
  203. mlrun/track/trackers/mlflow_tracker.py +6 -5
  204. mlrun/utils/async_http.py +1 -1
  205. mlrun/utils/helpers.py +72 -28
  206. mlrun/utils/logger.py +104 -2
  207. mlrun/utils/notifications/notification/base.py +23 -4
  208. mlrun/utils/notifications/notification/console.py +1 -1
  209. mlrun/utils/notifications/notification/git.py +6 -6
  210. mlrun/utils/notifications/notification/ipython.py +5 -4
  211. mlrun/utils/notifications/notification/slack.py +1 -1
  212. mlrun/utils/notifications/notification/webhook.py +13 -17
  213. mlrun/utils/notifications/notification_pusher.py +23 -19
  214. mlrun/utils/regex.py +1 -1
  215. mlrun/utils/version/version.json +2 -2
  216. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/METADATA +186 -186
  217. mlrun-1.8.0rc1.dist-info/RECORD +356 -0
  218. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/WHEEL +1 -1
  219. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  220. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/LICENSE +0 -0
  221. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/entry_points.txt +0 -0
  222. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/top_level.txt +0 -0
@@ -13,8 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import typing
16
- from datetime import datetime
17
- from typing import Union
16
+ from datetime import datetime, timedelta, timezone
18
17
 
19
18
  import pandas as pd
20
19
  import taosws
@@ -82,13 +81,16 @@ class TDEngineConnector(TSDBConnector):
82
81
  """Initialize the super tables for the TSDB."""
83
82
  self.tables = {
84
83
  mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
85
- project=self.project, database=self.database
84
+ self.database
86
85
  ),
87
86
  mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
88
- project=self.project, database=self.database
87
+ self.database
89
88
  ),
90
89
  mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
91
- project=self.project, database=self.database
90
+ self.database
91
+ ),
92
+ mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
93
+ self.database
92
94
  ),
93
95
  }
94
96
 
@@ -112,9 +114,11 @@ class TDEngineConnector(TSDBConnector):
112
114
  """
113
115
 
114
116
  table_name = (
117
+ f"{self.project}_"
115
118
  f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
116
- f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
119
+ f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}_"
117
120
  )
121
+ event[mm_schemas.EventFieldType.PROJECT] = self.project
118
122
 
119
123
  if kind == mm_schemas.WriterEventKind.RESULT:
120
124
  # Write a new result
@@ -122,7 +126,6 @@ class TDEngineConnector(TSDBConnector):
122
126
  table_name = (
123
127
  f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
124
128
  ).replace("-", "_")
125
- event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
126
129
 
127
130
  else:
128
131
  # Write a new metric
@@ -185,9 +188,7 @@ class TDEngineConnector(TSDBConnector):
185
188
  name=name,
186
189
  after=after,
187
190
  url=self._tdengine_connection_string,
188
- supertable=self.tables[
189
- mm_schemas.TDEngineSuperTables.PREDICTIONS
190
- ].super_table,
191
+ supertable=mm_schemas.TDEngineSuperTables.PREDICTIONS,
191
192
  table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
192
193
  time_col=mm_schemas.EventFieldType.TIME,
193
194
  database=self.database,
@@ -209,8 +210,38 @@ class TDEngineConnector(TSDBConnector):
209
210
  after="ProcessBeforeTDEngine",
210
211
  )
211
212
 
212
- def handle_model_error(self, graph, **kwargs) -> None:
213
- pass
213
+ def handle_model_error(
214
+ self,
215
+ graph,
216
+ tsdb_batching_max_events: int = 1000,
217
+ tsdb_batching_timeout_secs: int = 30,
218
+ **kwargs,
219
+ ) -> None:
220
+ graph.add_step(
221
+ "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
222
+ name="error_extractor",
223
+ after="ForwardError",
224
+ )
225
+ graph.add_step(
226
+ "storey.TDEngineTarget",
227
+ name="tsdb_error",
228
+ after="error_extractor",
229
+ url=self._tdengine_connection_string,
230
+ supertable=mm_schemas.TDEngineSuperTables.ERRORS,
231
+ table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
232
+ time_col=mm_schemas.EventFieldType.TIME,
233
+ database=self.database,
234
+ columns=[
235
+ mm_schemas.EventFieldType.MODEL_ERROR,
236
+ ],
237
+ tag_cols=[
238
+ mm_schemas.EventFieldType.PROJECT,
239
+ mm_schemas.EventFieldType.ENDPOINT_ID,
240
+ mm_schemas.EventFieldType.ERROR_TYPE,
241
+ ],
242
+ max_events=tsdb_batching_max_events,
243
+ flush_after_seconds=tsdb_batching_timeout_secs,
244
+ )
214
245
 
215
246
  def delete_tsdb_resources(self):
216
247
  """
@@ -220,23 +251,22 @@ class TDEngineConnector(TSDBConnector):
220
251
  "Deleting all project resources using the TDEngine connector",
221
252
  project=self.project,
222
253
  )
223
- drop_statements = []
224
254
  for table in self.tables:
225
- drop_statements.append(self.tables[table].drop_supertable_query())
226
-
227
- try:
228
- self.connection.run(
229
- statements=drop_statements,
255
+ get_subtable_names_query = self.tables[table]._get_subtables_query(
256
+ values={mm_schemas.EventFieldType.PROJECT: self.project}
257
+ )
258
+ subtables = self.connection.run(
259
+ query=get_subtable_names_query,
230
260
  timeout=self._timeout,
231
261
  retries=self._retries,
232
- )
233
- except Exception as e:
234
- logger.warning(
235
- "Failed to drop TDEngine tables. You may need to drop them manually. "
236
- "These can be found under the following supertables: app_results, "
237
- "metrics, and predictions.",
238
- project=self.project,
239
- error=mlrun.errors.err_to_str(e),
262
+ ).data
263
+ drop_statements = []
264
+ for subtable in subtables:
265
+ drop_statements.append(
266
+ self.tables[table]._drop_subtable_query(subtable=subtable[0])
267
+ )
268
+ self.connection.run(
269
+ statements=drop_statements, timeout=self._timeout, retries=self._retries
240
270
  )
241
271
  logger.debug(
242
272
  "Deleted all project resources using the TDEngine connector",
@@ -265,6 +295,10 @@ class TDEngineConnector(TSDBConnector):
265
295
  limit: typing.Optional[int] = None,
266
296
  sliding_window_step: typing.Optional[str] = None,
267
297
  timestamp_column: str = mm_schemas.EventFieldType.TIME,
298
+ group_by: typing.Optional[typing.Union[list[str], str]] = None,
299
+ preform_agg_columns: typing.Optional[list] = None,
300
+ order_by: typing.Optional[str] = None,
301
+ desc: typing.Optional[bool] = None,
268
302
  ) -> pd.DataFrame:
269
303
  """
270
304
  Getting records from TSDB data collection.
@@ -284,11 +318,26 @@ class TDEngineConnector(TSDBConnector):
284
318
  `sliding_window_step` is provided, interval must be provided as well. Provided
285
319
  as a string in the format of '1m', '1h', etc.
286
320
  :param timestamp_column: The column name that holds the timestamp index.
321
+ :param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
322
+ functions must bg provided
323
+ :param preform_agg_columns: The columns to preform aggregation on.
324
+ notice that all aggregation functions provided will preform on those columns.
325
+ If not provided The default behavior is to preform on all columns in columns,
326
+ if an empty list was provided The aggregation won't be performed.
327
+ :param order_by: The column or alias to preform ordering on the query.
328
+ :param desc: Whether or not to sort the results in descending order.
287
329
 
288
330
  :return: DataFrame with the provided attributes from the data collection.
289
331
  :raise: MLRunInvalidArgumentError if query the provided table failed.
290
332
  """
291
333
 
334
+ project_condition = f"project = '{self.project}'"
335
+ filter_query = (
336
+ f"({filter_query}) AND ({project_condition})"
337
+ if filter_query
338
+ else project_condition
339
+ )
340
+
292
341
  full_query = tdengine_schemas.TDEngineSchema._get_records_query(
293
342
  table=table,
294
343
  start=start,
@@ -301,6 +350,10 @@ class TDEngineConnector(TSDBConnector):
301
350
  sliding_window_step=sliding_window_step,
302
351
  timestamp_column=timestamp_column,
303
352
  database=self.database,
353
+ group_by=group_by,
354
+ preform_agg_funcs_columns=preform_agg_columns,
355
+ order_by=order_by,
356
+ desc=desc,
304
357
  )
305
358
  logger.debug("Querying TDEngine", query=full_query)
306
359
  try:
@@ -323,6 +376,7 @@ class TDEngineConnector(TSDBConnector):
323
376
  end: datetime,
324
377
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
325
378
  type: typing.Literal["metrics", "results"],
379
+ with_result_extra_data: bool = False,
326
380
  ) -> typing.Union[
327
381
  list[
328
382
  typing.Union[
@@ -340,12 +394,18 @@ class TDEngineConnector(TSDBConnector):
340
394
  timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
341
395
  columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
342
396
  if type == "metrics":
343
- table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
397
+ if with_result_extra_data:
398
+ logger.warning(
399
+ "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
400
+ project=self.project,
401
+ endpoint_id=endpoint_id,
402
+ )
403
+ table = mm_schemas.TDEngineSuperTables.METRICS
344
404
  name = mm_schemas.MetricData.METRIC_NAME
345
405
  columns += [name, mm_schemas.MetricData.METRIC_VALUE]
346
406
  df_handler = self.df_to_metrics_values
347
407
  elif type == "results":
348
- table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
408
+ table = mm_schemas.TDEngineSuperTables.APP_RESULTS
349
409
  name = mm_schemas.ResultData.RESULT_NAME
350
410
  columns += [
351
411
  name,
@@ -353,6 +413,8 @@ class TDEngineConnector(TSDBConnector):
353
413
  mm_schemas.ResultData.RESULT_STATUS,
354
414
  mm_schemas.ResultData.RESULT_KIND,
355
415
  ]
416
+ if with_result_extra_data:
417
+ columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
356
418
  df_handler = self.df_to_results_values
357
419
  else:
358
420
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -389,6 +451,10 @@ class TDEngineConnector(TSDBConnector):
389
451
  is_empty=df.empty,
390
452
  )
391
453
 
454
+ if not with_result_extra_data and type == "results":
455
+ # Set the extra data to an empty string if it's not requested
456
+ df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
457
+
392
458
  return df_handler(df=df, metrics=metrics, project=self.project)
393
459
 
394
460
  def read_predictions(
@@ -411,7 +477,7 @@ class TDEngineConnector(TSDBConnector):
411
477
  "both or neither of `aggregation_window` and `agg_funcs` must be provided"
412
478
  )
413
479
  df = self._get_records(
414
- table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
480
+ table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
415
481
  start=start,
416
482
  end=end,
417
483
  columns=[mm_schemas.EventFieldType.LATENCY],
@@ -452,51 +518,218 @@ class TDEngineConnector(TSDBConnector):
452
518
 
453
519
  def get_last_request(
454
520
  self,
455
- endpoint_ids: Union[str, list[str]],
456
- start: Union[datetime, str] = "0",
457
- end: Union[datetime, str] = "now",
521
+ endpoint_ids: typing.Union[str, list[str]],
522
+ start: typing.Optional[datetime] = None,
523
+ end: typing.Optional[datetime] = None,
458
524
  ) -> pd.DataFrame:
459
- pass
525
+ endpoint_ids = (
526
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
527
+ )
528
+ start, end = self._get_start_end(start, end)
529
+ df = self._get_records(
530
+ table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
531
+ start=start,
532
+ end=end,
533
+ columns=[
534
+ mm_schemas.EventFieldType.ENDPOINT_ID,
535
+ mm_schemas.EventFieldType.TIME,
536
+ mm_schemas.EventFieldType.LATENCY,
537
+ ],
538
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
539
+ timestamp_column=mm_schemas.EventFieldType.TIME,
540
+ agg_funcs=["last"],
541
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
542
+ preform_agg_columns=[mm_schemas.EventFieldType.TIME],
543
+ )
544
+ if not df.empty:
545
+ df.dropna(inplace=True)
546
+ df.rename(
547
+ columns={
548
+ f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
549
+ f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
550
+ },
551
+ inplace=True,
552
+ )
553
+ df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
554
+ mm_schemas.EventFieldType.LAST_REQUEST
555
+ ].map(
556
+ lambda last_request: datetime.strptime(
557
+ last_request, "%Y-%m-%d %H:%M:%S.%f %z"
558
+ ).astimezone(tz=timezone.utc)
559
+ )
560
+ return df
460
561
 
461
562
  def get_drift_status(
462
563
  self,
463
- endpoint_ids: Union[str, list[str]],
464
- start: Union[datetime, str] = "now-24h",
465
- end: Union[datetime, str] = "now",
564
+ endpoint_ids: typing.Union[str, list[str]],
565
+ start: typing.Optional[datetime] = None,
566
+ end: typing.Optional[datetime] = None,
466
567
  ) -> pd.DataFrame:
467
- pass
568
+ endpoint_ids = (
569
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
570
+ )
571
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
572
+ start, end = self._get_start_end(start, end)
573
+ df = self._get_records(
574
+ table=mm_schemas.TDEngineSuperTables.APP_RESULTS,
575
+ start=start,
576
+ end=end,
577
+ columns=[
578
+ mm_schemas.ResultData.RESULT_STATUS,
579
+ mm_schemas.EventFieldType.ENDPOINT_ID,
580
+ ],
581
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
582
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
583
+ agg_funcs=["max"],
584
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
585
+ preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
586
+ )
587
+ df.rename(
588
+ columns={
589
+ f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
590
+ },
591
+ inplace=True,
592
+ )
593
+ if not df.empty:
594
+ df.dropna(inplace=True)
595
+ return df
468
596
 
469
597
  def get_metrics_metadata(
470
598
  self,
471
599
  endpoint_id: str,
472
- start: Union[datetime, str] = "0",
473
- end: Union[datetime, str] = "now",
600
+ start: typing.Optional[datetime] = None,
601
+ end: typing.Optional[datetime] = None,
474
602
  ) -> pd.DataFrame:
475
- pass
603
+ start, end = self._get_start_end(start, end)
604
+ df = self._get_records(
605
+ table=mm_schemas.TDEngineSuperTables.METRICS,
606
+ start=start,
607
+ end=end,
608
+ columns=[
609
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
610
+ mm_schemas.MetricData.METRIC_NAME,
611
+ mm_schemas.EventFieldType.ENDPOINT_ID,
612
+ ],
613
+ filter_query=f"endpoint_id='{endpoint_id}'",
614
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
615
+ group_by=[
616
+ mm_schemas.WriterEvent.APPLICATION_NAME,
617
+ mm_schemas.MetricData.METRIC_NAME,
618
+ ],
619
+ agg_funcs=["last"],
620
+ )
621
+ df.rename(
622
+ columns={
623
+ f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
624
+ f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
625
+ f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
626
+ },
627
+ inplace=True,
628
+ )
629
+ if not df.empty:
630
+ df.dropna(inplace=True)
631
+ return df
476
632
 
477
633
  def get_results_metadata(
478
634
  self,
479
635
  endpoint_id: str,
480
- start: Union[datetime, str] = "0",
481
- end: Union[datetime, str] = "now",
636
+ start: typing.Optional[datetime] = None,
637
+ end: typing.Optional[datetime] = None,
482
638
  ) -> pd.DataFrame:
483
- pass
639
+ start, end = self._get_start_end(start, end)
640
+ df = self._get_records(
641
+ table=mm_schemas.TDEngineSuperTables.APP_RESULTS,
642
+ start=start,
643
+ end=end,
644
+ columns=[
645
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
646
+ mm_schemas.ResultData.RESULT_NAME,
647
+ mm_schemas.ResultData.RESULT_KIND,
648
+ mm_schemas.EventFieldType.ENDPOINT_ID,
649
+ ],
650
+ filter_query=f"endpoint_id='{endpoint_id}'",
651
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
652
+ group_by=[
653
+ mm_schemas.WriterEvent.APPLICATION_NAME,
654
+ mm_schemas.ResultData.RESULT_NAME,
655
+ ],
656
+ agg_funcs=["last"],
657
+ )
658
+ df.rename(
659
+ columns={
660
+ f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
661
+ f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
662
+ f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
663
+ f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
664
+ },
665
+ inplace=True,
666
+ )
667
+ if not df.empty:
668
+ df.dropna(inplace=True)
669
+ return df
484
670
 
485
671
  def get_error_count(
486
672
  self,
487
- endpoint_ids: Union[str, list[str]],
488
- start: Union[datetime, str] = "0",
489
- end: Union[datetime, str] = "now",
673
+ endpoint_ids: typing.Union[str, list[str]],
674
+ start: typing.Optional[datetime] = None,
675
+ end: typing.Optional[datetime] = None,
490
676
  ) -> pd.DataFrame:
491
- pass
677
+ endpoint_ids = (
678
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
679
+ )
680
+ start, end = self._get_start_end(start, end)
681
+ df = self._get_records(
682
+ table=mm_schemas.TDEngineSuperTables.ERRORS,
683
+ start=start,
684
+ end=end,
685
+ columns=[
686
+ mm_schemas.EventFieldType.MODEL_ERROR,
687
+ mm_schemas.EventFieldType.ENDPOINT_ID,
688
+ ],
689
+ agg_funcs=["count"],
690
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
691
+ f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
692
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
693
+ preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
694
+ )
695
+ df.rename(
696
+ columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
697
+ inplace=True,
698
+ )
699
+ if not df.empty:
700
+ df.dropna(inplace=True)
701
+ return df
492
702
 
493
703
  def get_avg_latency(
494
704
  self,
495
- endpoint_ids: Union[str, list[str]],
496
- start: Union[datetime, str] = "0",
497
- end: Union[datetime, str] = "now",
705
+ endpoint_ids: typing.Union[str, list[str]],
706
+ start: typing.Optional[datetime] = None,
707
+ end: typing.Optional[datetime] = None,
498
708
  ) -> pd.DataFrame:
499
- pass
709
+ endpoint_ids = (
710
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
711
+ )
712
+ start, end = self._get_start_end(start, end)
713
+ df = self._get_records(
714
+ table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
715
+ start=start,
716
+ end=end,
717
+ columns=[
718
+ mm_schemas.EventFieldType.LATENCY,
719
+ mm_schemas.EventFieldType.ENDPOINT_ID,
720
+ ],
721
+ agg_funcs=["avg"],
722
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
723
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
724
+ preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
725
+ )
726
+ df.rename(
727
+ columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
728
+ inplace=True,
729
+ )
730
+ if not df.empty:
731
+ df.dropna(inplace=True)
732
+ return df
500
733
 
501
734
  # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
502
735
  #
@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
150
150
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
151
151
  event = {
152
152
  EventFieldType.MODEL_ERROR: str(error),
153
+ EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
153
154
  EventFieldType.ENDPOINT_ID: endpoint_id,
154
155
  EventFieldType.TIMESTAMP: timestamp,
155
156
  EventFieldType.ERROR_COUNT: 1.0,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from datetime import datetime, timezone
15
+ from datetime import datetime, timedelta, timezone
16
16
  from io import StringIO
17
17
  from typing import Literal, Optional, Union
18
18
 
@@ -310,6 +310,7 @@ class V3IOTSDBConnector(TSDBConnector):
310
310
  ],
311
311
  index_cols=[
312
312
  mm_schemas.EventFieldType.ENDPOINT_ID,
313
+ mm_schemas.EventFieldType.ERROR_TYPE,
313
314
  ],
314
315
  max_events=tsdb_batching_max_events,
315
316
  flush_after_seconds=tsdb_batching_timeout_secs,
@@ -338,9 +339,6 @@ class V3IOTSDBConnector(TSDBConnector):
338
339
  elif kind == mm_schemas.WriterEventKind.RESULT:
339
340
  table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
340
341
  index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
341
- event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
342
- # TODO: remove this when extra data is supported (ML-7460)
343
- event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
344
342
  else:
345
343
  raise ValueError(f"Invalid {kind = }")
346
344
 
@@ -544,6 +542,7 @@ class V3IOTSDBConnector(TSDBConnector):
544
542
  end: datetime,
545
543
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
546
544
  type: Literal["metrics", "results"] = "results",
545
+ with_result_extra_data: bool = False,
547
546
  ) -> Union[
548
547
  list[
549
548
  Union[
@@ -565,6 +564,12 @@ class V3IOTSDBConnector(TSDBConnector):
565
564
  """
566
565
 
567
566
  if type == "metrics":
567
+ if with_result_extra_data:
568
+ logger.warning(
569
+ "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
570
+ project=self.project,
571
+ endpoint_id=endpoint_id,
572
+ )
568
573
  table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
569
574
  name = mm_schemas.MetricData.METRIC_NAME
570
575
  columns = [mm_schemas.MetricData.METRIC_VALUE]
@@ -577,6 +582,8 @@ class V3IOTSDBConnector(TSDBConnector):
577
582
  mm_schemas.ResultData.RESULT_STATUS,
578
583
  mm_schemas.ResultData.RESULT_KIND,
579
584
  ]
585
+ if with_result_extra_data:
586
+ columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
580
587
  df_handler = self.df_to_results_values
581
588
  else:
582
589
  raise ValueError(f"Invalid {type = }")
@@ -605,6 +612,9 @@ class V3IOTSDBConnector(TSDBConnector):
605
612
  endpoint_id=endpoint_id,
606
613
  is_empty=df.empty,
607
614
  )
615
+ if not with_result_extra_data and type == "results":
616
+ # Set the extra data to an empty string if it's not requested
617
+ df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
608
618
 
609
619
  return df_handler(df=df, metrics=metrics, project=self.project)
610
620
 
@@ -700,12 +710,13 @@ class V3IOTSDBConnector(TSDBConnector):
700
710
  def get_last_request(
701
711
  self,
702
712
  endpoint_ids: Union[str, list[str]],
703
- start: Union[datetime, str] = "0",
704
- end: Union[datetime, str] = "now",
713
+ start: Optional[datetime] = None,
714
+ end: Optional[datetime] = None,
705
715
  ) -> pd.DataFrame:
706
716
  endpoint_ids = (
707
717
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
708
718
  )
719
+ start, end = self._get_start_end(start, end)
709
720
  df = self._get_records(
710
721
  table=mm_schemas.FileTargetKind.PREDICTIONS,
711
722
  start=start,
@@ -734,12 +745,14 @@ class V3IOTSDBConnector(TSDBConnector):
734
745
  def get_drift_status(
735
746
  self,
736
747
  endpoint_ids: Union[str, list[str]],
737
- start: Union[datetime, str] = "now-24h",
738
- end: Union[datetime, str] = "now",
748
+ start: Optional[datetime] = None,
749
+ end: Optional[datetime] = None,
739
750
  ) -> pd.DataFrame:
740
751
  endpoint_ids = (
741
752
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
742
753
  )
754
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
755
+ start, end = self._get_start_end(start, end)
743
756
  df = self._get_records(
744
757
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
745
758
  start=start,
@@ -758,9 +771,10 @@ class V3IOTSDBConnector(TSDBConnector):
758
771
  def get_metrics_metadata(
759
772
  self,
760
773
  endpoint_id: str,
761
- start: Union[datetime, str] = "0",
762
- end: Union[datetime, str] = "now",
774
+ start: Optional[datetime] = None,
775
+ end: Optional[datetime] = None,
763
776
  ) -> pd.DataFrame:
777
+ start, end = self._get_start_end(start, end)
764
778
  df = self._get_records(
765
779
  table=mm_schemas.V3IOTSDBTables.METRICS,
766
780
  start=start,
@@ -778,9 +792,10 @@ class V3IOTSDBConnector(TSDBConnector):
778
792
  def get_results_metadata(
779
793
  self,
780
794
  endpoint_id: str,
781
- start: Union[datetime, str] = "0",
782
- end: Union[datetime, str] = "now",
795
+ start: Optional[datetime] = None,
796
+ end: Optional[datetime] = None,
783
797
  ) -> pd.DataFrame:
798
+ start, end = self._get_start_end(start, end)
784
799
  df = self._get_records(
785
800
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
786
801
  start=start,
@@ -803,18 +818,20 @@ class V3IOTSDBConnector(TSDBConnector):
803
818
  def get_error_count(
804
819
  self,
805
820
  endpoint_ids: Union[str, list[str]],
806
- start: Union[datetime, str] = "0",
807
- end: Union[datetime, str] = "now",
821
+ start: Optional[datetime] = None,
822
+ end: Optional[datetime] = None,
808
823
  ) -> pd.DataFrame:
809
824
  endpoint_ids = (
810
825
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
811
826
  )
827
+ start, end = self._get_start_end(start, end)
812
828
  df = self._get_records(
813
829
  table=mm_schemas.FileTargetKind.ERRORS,
814
830
  start=start,
815
831
  end=end,
816
832
  columns=[mm_schemas.EventFieldType.ERROR_COUNT],
817
- filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
833
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
834
+ f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
818
835
  agg_funcs=["count"],
819
836
  )
820
837
  if not df.empty:
@@ -830,12 +847,13 @@ class V3IOTSDBConnector(TSDBConnector):
830
847
  def get_avg_latency(
831
848
  self,
832
849
  endpoint_ids: Union[str, list[str]],
833
- start: Union[datetime, str] = "0",
834
- end: Union[datetime, str] = "now",
850
+ start: Optional[datetime] = None,
851
+ end: Optional[datetime] = None,
835
852
  ) -> pd.DataFrame:
836
853
  endpoint_ids = (
837
854
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
838
855
  )
856
+ start, end = self._get_start_end(start, end)
839
857
  df = self._get_records(
840
858
  table=mm_schemas.FileTargetKind.PREDICTIONS,
841
859
  start=start,