mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (222) hide show
  1. mlrun/__init__.py +14 -12
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +19 -12
  4. mlrun/artifacts/__init__.py +0 -2
  5. mlrun/artifacts/base.py +34 -11
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/manager.py +13 -13
  8. mlrun/artifacts/model.py +66 -53
  9. mlrun/common/constants.py +6 -0
  10. mlrun/common/formatters/__init__.py +1 -0
  11. mlrun/common/formatters/feature_set.py +1 -0
  12. mlrun/common/formatters/function.py +1 -0
  13. mlrun/common/formatters/model_endpoint.py +30 -0
  14. mlrun/common/formatters/pipeline.py +1 -2
  15. mlrun/common/model_monitoring/__init__.py +0 -3
  16. mlrun/common/model_monitoring/helpers.py +1 -1
  17. mlrun/common/runtimes/constants.py +1 -2
  18. mlrun/common/schemas/__init__.py +4 -2
  19. mlrun/common/schemas/artifact.py +0 -6
  20. mlrun/common/schemas/common.py +50 -0
  21. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  22. mlrun/common/schemas/model_monitoring/constants.py +62 -12
  23. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
  24. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -5
  25. mlrun/common/schemas/partition.py +122 -0
  26. mlrun/config.py +43 -15
  27. mlrun/data_types/__init__.py +0 -2
  28. mlrun/data_types/data_types.py +0 -1
  29. mlrun/data_types/infer.py +3 -1
  30. mlrun/data_types/spark.py +4 -4
  31. mlrun/data_types/to_pandas.py +2 -11
  32. mlrun/datastore/__init__.py +0 -2
  33. mlrun/datastore/alibaba_oss.py +4 -1
  34. mlrun/datastore/azure_blob.py +4 -1
  35. mlrun/datastore/base.py +12 -4
  36. mlrun/datastore/datastore.py +9 -3
  37. mlrun/datastore/datastore_profile.py +1 -1
  38. mlrun/datastore/dbfs_store.py +4 -1
  39. mlrun/datastore/filestore.py +4 -1
  40. mlrun/datastore/google_cloud_storage.py +4 -1
  41. mlrun/datastore/hdfs.py +4 -1
  42. mlrun/datastore/inmem.py +4 -1
  43. mlrun/datastore/redis.py +4 -1
  44. mlrun/datastore/s3.py +4 -1
  45. mlrun/datastore/sources.py +51 -49
  46. mlrun/datastore/store_resources.py +0 -2
  47. mlrun/datastore/targets.py +22 -23
  48. mlrun/datastore/utils.py +2 -2
  49. mlrun/datastore/v3io.py +4 -1
  50. mlrun/datastore/wasbfs/fs.py +13 -12
  51. mlrun/db/base.py +126 -62
  52. mlrun/db/factory.py +3 -0
  53. mlrun/db/httpdb.py +767 -231
  54. mlrun/db/nopdb.py +126 -57
  55. mlrun/errors.py +2 -2
  56. mlrun/execution.py +55 -29
  57. mlrun/feature_store/__init__.py +0 -2
  58. mlrun/feature_store/api.py +40 -40
  59. mlrun/feature_store/common.py +9 -9
  60. mlrun/feature_store/feature_set.py +20 -18
  61. mlrun/feature_store/feature_vector.py +27 -24
  62. mlrun/feature_store/retrieval/base.py +14 -9
  63. mlrun/feature_store/retrieval/job.py +2 -1
  64. mlrun/feature_store/steps.py +2 -2
  65. mlrun/features.py +30 -13
  66. mlrun/frameworks/__init__.py +1 -2
  67. mlrun/frameworks/_common/__init__.py +1 -2
  68. mlrun/frameworks/_common/artifacts_library.py +2 -2
  69. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  70. mlrun/frameworks/_common/model_handler.py +29 -27
  71. mlrun/frameworks/_common/producer.py +3 -1
  72. mlrun/frameworks/_dl_common/__init__.py +1 -2
  73. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  74. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  75. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  76. mlrun/frameworks/_ml_common/__init__.py +1 -2
  77. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  78. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  79. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  80. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  81. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  82. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  83. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  84. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  85. mlrun/frameworks/huggingface/__init__.py +1 -2
  86. mlrun/frameworks/huggingface/model_server.py +9 -9
  87. mlrun/frameworks/lgbm/__init__.py +47 -44
  88. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  89. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  90. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  91. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  92. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  93. mlrun/frameworks/lgbm/model_handler.py +15 -11
  94. mlrun/frameworks/lgbm/model_server.py +11 -7
  95. mlrun/frameworks/lgbm/utils.py +2 -2
  96. mlrun/frameworks/onnx/__init__.py +1 -2
  97. mlrun/frameworks/onnx/dataset.py +3 -3
  98. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  99. mlrun/frameworks/onnx/model_handler.py +7 -5
  100. mlrun/frameworks/onnx/model_server.py +8 -6
  101. mlrun/frameworks/parallel_coordinates.py +11 -11
  102. mlrun/frameworks/pytorch/__init__.py +22 -23
  103. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  104. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  105. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  106. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  107. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  108. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  109. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  110. mlrun/frameworks/pytorch/model_handler.py +21 -17
  111. mlrun/frameworks/pytorch/model_server.py +13 -9
  112. mlrun/frameworks/sklearn/__init__.py +19 -18
  113. mlrun/frameworks/sklearn/estimator.py +2 -2
  114. mlrun/frameworks/sklearn/metric.py +3 -3
  115. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  116. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  117. mlrun/frameworks/sklearn/model_handler.py +4 -3
  118. mlrun/frameworks/tf_keras/__init__.py +11 -12
  119. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  120. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  121. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  122. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  123. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  124. mlrun/frameworks/tf_keras/model_server.py +12 -8
  125. mlrun/frameworks/xgboost/__init__.py +19 -18
  126. mlrun/frameworks/xgboost/model_handler.py +13 -9
  127. mlrun/launcher/base.py +3 -4
  128. mlrun/launcher/local.py +1 -1
  129. mlrun/launcher/remote.py +1 -1
  130. mlrun/lists.py +4 -3
  131. mlrun/model.py +108 -44
  132. mlrun/model_monitoring/__init__.py +1 -2
  133. mlrun/model_monitoring/api.py +6 -6
  134. mlrun/model_monitoring/applications/_application_steps.py +13 -15
  135. mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
  136. mlrun/model_monitoring/applications/results.py +55 -3
  137. mlrun/model_monitoring/controller.py +185 -223
  138. mlrun/model_monitoring/db/_schedules.py +156 -0
  139. mlrun/model_monitoring/db/_stats.py +189 -0
  140. mlrun/model_monitoring/db/stores/__init__.py +1 -1
  141. mlrun/model_monitoring/db/stores/base/store.py +6 -65
  142. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
  143. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
  144. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
  145. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
  146. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
  147. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
  148. mlrun/model_monitoring/db/tsdb/base.py +74 -22
  149. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +66 -35
  150. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  151. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +284 -51
  152. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  153. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
  154. mlrun/model_monitoring/helpers.py +97 -1
  155. mlrun/model_monitoring/model_endpoint.py +4 -2
  156. mlrun/model_monitoring/stream_processing.py +2 -2
  157. mlrun/model_monitoring/tracking_policy.py +10 -3
  158. mlrun/model_monitoring/writer.py +47 -26
  159. mlrun/package/__init__.py +3 -6
  160. mlrun/package/context_handler.py +1 -1
  161. mlrun/package/packager.py +12 -9
  162. mlrun/package/packagers/__init__.py +0 -2
  163. mlrun/package/packagers/default_packager.py +14 -11
  164. mlrun/package/packagers/numpy_packagers.py +16 -7
  165. mlrun/package/packagers/pandas_packagers.py +18 -18
  166. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  167. mlrun/package/packagers_manager.py +31 -14
  168. mlrun/package/utils/__init__.py +0 -3
  169. mlrun/package/utils/_pickler.py +6 -6
  170. mlrun/platforms/__init__.py +3 -3
  171. mlrun/platforms/iguazio.py +4 -1
  172. mlrun/projects/__init__.py +1 -6
  173. mlrun/projects/operations.py +27 -27
  174. mlrun/projects/pipelines.py +85 -215
  175. mlrun/projects/project.py +444 -158
  176. mlrun/run.py +9 -9
  177. mlrun/runtimes/__init__.py +1 -3
  178. mlrun/runtimes/base.py +13 -10
  179. mlrun/runtimes/daskjob.py +9 -9
  180. mlrun/runtimes/generators.py +2 -1
  181. mlrun/runtimes/kubejob.py +4 -5
  182. mlrun/runtimes/mpijob/__init__.py +0 -2
  183. mlrun/runtimes/mpijob/abstract.py +7 -6
  184. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  185. mlrun/runtimes/nuclio/application/application.py +11 -11
  186. mlrun/runtimes/nuclio/function.py +14 -13
  187. mlrun/runtimes/nuclio/serving.py +9 -9
  188. mlrun/runtimes/pod.py +74 -29
  189. mlrun/runtimes/remotesparkjob.py +3 -2
  190. mlrun/runtimes/sparkjob/__init__.py +0 -2
  191. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  192. mlrun/runtimes/utils.py +6 -5
  193. mlrun/serving/merger.py +6 -4
  194. mlrun/serving/remote.py +18 -17
  195. mlrun/serving/routers.py +27 -27
  196. mlrun/serving/server.py +1 -1
  197. mlrun/serving/states.py +76 -71
  198. mlrun/serving/utils.py +13 -2
  199. mlrun/serving/v1_serving.py +3 -2
  200. mlrun/serving/v2_serving.py +4 -4
  201. mlrun/track/__init__.py +1 -1
  202. mlrun/track/tracker.py +2 -2
  203. mlrun/track/trackers/mlflow_tracker.py +6 -5
  204. mlrun/utils/async_http.py +1 -1
  205. mlrun/utils/helpers.py +72 -28
  206. mlrun/utils/logger.py +104 -2
  207. mlrun/utils/notifications/notification/base.py +23 -4
  208. mlrun/utils/notifications/notification/console.py +1 -1
  209. mlrun/utils/notifications/notification/git.py +6 -6
  210. mlrun/utils/notifications/notification/ipython.py +5 -4
  211. mlrun/utils/notifications/notification/slack.py +1 -1
  212. mlrun/utils/notifications/notification/webhook.py +13 -17
  213. mlrun/utils/notifications/notification_pusher.py +23 -19
  214. mlrun/utils/regex.py +1 -1
  215. mlrun/utils/version/version.json +2 -2
  216. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/METADATA +186 -186
  217. mlrun-1.8.0rc1.dist-info/RECORD +356 -0
  218. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/WHEEL +1 -1
  219. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  220. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/LICENSE +0 -0
  221. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/entry_points.txt +0 -0
  222. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,6 @@
15
15
  import typing
16
16
  from abc import ABC, abstractmethod
17
17
  from datetime import datetime
18
- from typing import Union
19
18
 
20
19
  import pandas as pd
21
20
  import pydantic
@@ -132,6 +131,7 @@ class TSDBConnector(ABC):
132
131
  end: datetime,
133
132
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
134
133
  type: typing.Literal["metrics", "results"],
134
+ with_result_extra_data: bool,
135
135
  ) -> typing.Union[
136
136
  list[
137
137
  typing.Union[
@@ -150,11 +150,13 @@ class TSDBConnector(ABC):
150
150
  Read metrics OR results from the TSDB and return as a list.
151
151
 
152
152
  :param endpoint_id: The model endpoint identifier.
153
- :param start: The start time of the query.
154
- :param end: The end time of the query.
155
- :param metrics: The list of metrics to get the values for.
156
- :param type: "metrics" or "results" - the type of each item in metrics.
157
- :return: A list of result values or a list of metric values.
153
+ :param start: The start time of the query.
154
+ :param end: The end time of the query.
155
+ :param metrics: The list of metrics to get the values for.
156
+ :param type: "metrics" or "results" - the type of each item in metrics.
157
+ :param with_result_extra_data: Whether to include the extra data in the results, relevant only when
158
+ `type="results"`.
159
+ :return: A list of result values or a list of metric values.
158
160
  """
159
161
 
160
162
  @abstractmethod
@@ -193,9 +195,9 @@ class TSDBConnector(ABC):
193
195
  @abstractmethod
194
196
  def get_last_request(
195
197
  self,
196
- endpoint_ids: Union[str, list[str]],
197
- start: Union[datetime, str] = "0",
198
- end: Union[datetime, str] = "now",
198
+ endpoint_ids: typing.Union[str, list[str]],
199
+ start: typing.Optional[datetime] = None,
200
+ end: typing.Optional[datetime] = None,
199
201
  ) -> pd.DataFrame:
200
202
  """
201
203
  Fetches data from the predictions TSDB table and returns the most recent request
@@ -212,9 +214,9 @@ class TSDBConnector(ABC):
212
214
  @abstractmethod
213
215
  def get_drift_status(
214
216
  self,
215
- endpoint_ids: Union[str, list[str]],
216
- start: Union[datetime, str] = "now-24h",
217
- end: Union[datetime, str] = "now",
217
+ endpoint_ids: typing.Union[str, list[str]],
218
+ start: typing.Optional[datetime] = None,
219
+ end: typing.Optional[datetime] = None,
218
220
  ) -> pd.DataFrame:
219
221
  """
220
222
  Fetches data from the app-results TSDB table and returns the highest status among all
@@ -233,8 +235,8 @@ class TSDBConnector(ABC):
233
235
  def get_metrics_metadata(
234
236
  self,
235
237
  endpoint_id: str,
236
- start: Union[datetime, str] = "0",
237
- end: Union[datetime, str] = "now",
238
+ start: typing.Optional[datetime] = None,
239
+ end: typing.Optional[datetime] = None,
238
240
  ) -> pd.DataFrame:
239
241
  """
240
242
  Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoint.
@@ -251,8 +253,8 @@ class TSDBConnector(ABC):
251
253
  def get_results_metadata(
252
254
  self,
253
255
  endpoint_id: str,
254
- start: Union[datetime, str] = "0",
255
- end: Union[datetime, str] = "now",
256
+ start: typing.Optional[datetime] = None,
257
+ end: typing.Optional[datetime] = None,
256
258
  ) -> pd.DataFrame:
257
259
  """
258
260
  Fetches distinct results metadata from the app-results TSDB table for a specified model endpoint.
@@ -268,9 +270,9 @@ class TSDBConnector(ABC):
268
270
  @abstractmethod
269
271
  def get_error_count(
270
272
  self,
271
- endpoint_ids: Union[str, list[str]],
272
- start: Union[datetime, str] = "0",
273
- end: Union[datetime, str] = "now",
273
+ endpoint_ids: typing.Union[str, list[str]],
274
+ start: typing.Optional[datetime] = None,
275
+ end: typing.Optional[datetime] = None,
274
276
  ) -> pd.DataFrame:
275
277
  """
276
278
  Fetches data from the error TSDB table and returns the error count for each specified endpoint.
@@ -286,9 +288,9 @@ class TSDBConnector(ABC):
286
288
  @abstractmethod
287
289
  def get_avg_latency(
288
290
  self,
289
- endpoint_ids: Union[str, list[str]],
290
- start: Union[datetime, str] = "0",
291
- end: Union[datetime, str] = "now",
291
+ endpoint_ids: typing.Union[str, list[str]],
292
+ start: typing.Optional[datetime] = None,
293
+ end: typing.Optional[datetime] = None,
292
294
  ) -> pd.DataFrame:
293
295
  """
294
296
  Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
@@ -420,6 +422,7 @@ class TSDBConnector(ABC):
420
422
  sub_df.index,
421
423
  sub_df[mm_schemas.ResultData.RESULT_VALUE],
422
424
  sub_df[mm_schemas.ResultData.RESULT_STATUS],
425
+ sub_df[mm_schemas.ResultData.RESULT_EXTRA_DATA],
423
426
  )
424
427
  ), # pyright: ignore[reportArgumentType]
425
428
  )
@@ -446,3 +449,52 @@ class TSDBConnector(ABC):
446
449
  )
447
450
 
448
451
  return metrics_values
452
+
453
+ @staticmethod
454
+ def df_to_metrics_list(
455
+ *,
456
+ df: pd.DataFrame,
457
+ project: str,
458
+ type: str,
459
+ ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
460
+ """
461
+ Parse a DataFrame of metrics from the TSDB into a list of mm metrics objects.
462
+
463
+ :param df: The DataFrame to parse.
464
+ :param project: The project name.
465
+ :param type: The type of the metrics (either "result" or "metric").
466
+
467
+ :return: A list of mm metrics objects.
468
+ """
469
+ return list(
470
+ map(
471
+ lambda record: mm_schemas.ModelEndpointMonitoringMetric(
472
+ project=project,
473
+ type=type,
474
+ app=record.get(mm_schemas.WriterEvent.APPLICATION_NAME),
475
+ name=record.get(mm_schemas.ResultData.RESULT_NAME)
476
+ or record.get(mm_schemas.MetricData.METRIC_NAME),
477
+ kind=record.get(mm_schemas.ResultData.RESULT_KIND),
478
+ ),
479
+ df.to_dict("records"),
480
+ )
481
+ )
482
+
483
+ @staticmethod
484
+ def _get_start_end(
485
+ start: typing.Union[datetime, None],
486
+ end: typing.Union[datetime, None],
487
+ ) -> tuple[datetime, datetime]:
488
+ """
489
+ static utils function for tsdb start end format
490
+ :param start: Either None or datetime, None is handled as datetime.min(tz=timezone.utc)
491
+ :param end: Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
492
+ :return: start datetime, end datetime
493
+ """
494
+ start = start or mlrun.utils.datetime_min()
495
+ end = end or mlrun.utils.datetime_now()
496
+ if not (isinstance(start, datetime) and isinstance(end, datetime)):
497
+ raise mlrun.errors.MLRunInvalidArgumentError(
498
+ "Both start and end must be datetime objects"
499
+ )
500
+ return start, end
@@ -26,7 +26,7 @@ _MODEL_MONITORING_DATABASE = "mlrun_model_monitoring"
26
26
 
27
27
 
28
28
  class _TDEngineColumnType:
29
- def __init__(self, data_type: str, length: int = None):
29
+ def __init__(self, data_type: str, length: Optional[int] = None):
30
30
  self.data_type = data_type
31
31
  self.length = length
32
32
 
@@ -46,7 +46,7 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
46
46
  INT = _TDEngineColumnType("INT")
47
47
  BINARY_40 = _TDEngineColumnType("BINARY", 40)
48
48
  BINARY_64 = _TDEngineColumnType("BINARY", 64)
49
- BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
49
+ BINARY_1000 = _TDEngineColumnType("BINARY", 1000)
50
50
 
51
51
 
52
52
  def values_to_column(values, column_type):
@@ -61,7 +61,7 @@ def values_to_column(values, column_type):
61
61
  return taosws.binary_to_column(values)
62
62
  if column_type == _TDEngineColumn.BINARY_64:
63
63
  return taosws.binary_to_column(values)
64
- if column_type == _TDEngineColumn.BINARY_10000:
64
+ if column_type == _TDEngineColumn.BINARY_1000:
65
65
  return taosws.binary_to_column(values)
66
66
 
67
67
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -82,10 +82,9 @@ class TDEngineSchema:
82
82
  super_table: str,
83
83
  columns: dict[str, _TDEngineColumn],
84
84
  tags: dict[str, str],
85
- project: str,
86
85
  database: Optional[str] = None,
87
86
  ):
88
- self.super_table = f"{super_table}_{project.replace('-', '_')}"
87
+ self.super_table = super_table
89
88
  self.columns = columns
90
89
  self.tags = tags
91
90
  self.database = database or _MODEL_MONITORING_DATABASE
@@ -149,9 +148,6 @@ class TDEngineSchema:
149
148
  ) -> str:
150
149
  return f"DROP TABLE if EXISTS {self.database}.{subtable};"
151
150
 
152
- def drop_supertable_query(self) -> str:
153
- return f"DROP STABLE if EXISTS {self.database}.{self.super_table};"
154
-
155
151
  def _get_subtables_query(
156
152
  self,
157
153
  values: dict[str, Union[str, int, float, datetime.datetime]],
@@ -170,7 +166,7 @@ class TDEngineSchema:
170
166
  table: str,
171
167
  start: datetime.datetime,
172
168
  end: datetime.datetime,
173
- columns_to_filter: list[str] = None,
169
+ columns_to_filter: Optional[list[str]] = None,
174
170
  filter_query: Optional[str] = None,
175
171
  interval: Optional[str] = None,
176
172
  limit: int = 0,
@@ -178,6 +174,10 @@ class TDEngineSchema:
178
174
  sliding_window_step: Optional[str] = None,
179
175
  timestamp_column: str = "time",
180
176
  database: str = _MODEL_MONITORING_DATABASE,
177
+ group_by: Optional[Union[list[str], str]] = None,
178
+ preform_agg_funcs_columns: Optional[list[str]] = None,
179
+ order_by: Optional[str] = None,
180
+ desc: Optional[bool] = None,
181
181
  ) -> str:
182
182
  if agg_funcs and not columns_to_filter:
183
183
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -194,15 +194,37 @@ class TDEngineSchema:
194
194
  raise mlrun.errors.MLRunInvalidArgumentError(
195
195
  "`interval` must be provided when using sliding window"
196
196
  )
197
+ if group_by and not agg_funcs:
198
+ raise mlrun.errors.MLRunInvalidArgumentError(
199
+ "aggregate functions must be provided when using group by"
200
+ )
201
+ if desc and not order_by:
202
+ raise mlrun.errors.MLRunInvalidArgumentError(
203
+ "`order_by` must be provided when using descending"
204
+ )
197
205
 
198
206
  with StringIO() as query:
199
207
  query.write("SELECT ")
200
208
  if interval:
201
209
  query.write("_wstart, _wend, ")
202
210
  if agg_funcs:
211
+ preform_agg_funcs_columns = (
212
+ columns_to_filter
213
+ if preform_agg_funcs_columns is None
214
+ else preform_agg_funcs_columns
215
+ )
203
216
  query.write(
204
217
  ", ".join(
205
- [f"{a}({col})" for a in agg_funcs for col in columns_to_filter]
218
+ [
219
+ f"{a}({col})"
220
+ if col.upper()
221
+ in map(
222
+ str.upper, preform_agg_funcs_columns
223
+ ) # Case-insensitive check
224
+ else f"{col}"
225
+ for a in agg_funcs
226
+ for col in columns_to_filter
227
+ ]
206
228
  )
207
229
  )
208
230
  elif columns_to_filter:
@@ -219,6 +241,13 @@ class TDEngineSchema:
219
241
  query.write(f"{timestamp_column} >= '{start}' AND ")
220
242
  if end:
221
243
  query.write(f"{timestamp_column} <= '{end}'")
244
+ if group_by:
245
+ if isinstance(group_by, list):
246
+ group_by = ", ".join(group_by)
247
+ query.write(f" GROUP BY {group_by}")
248
+ if order_by:
249
+ desc = " DESC" if desc else ""
250
+ query.write(f" ORDER BY {order_by}{desc}")
222
251
  if interval:
223
252
  query.write(f" INTERVAL({interval})")
224
253
  if sliding_window_step:
@@ -231,32 +260,28 @@ class TDEngineSchema:
231
260
 
232
261
  @dataclass
233
262
  class AppResultTable(TDEngineSchema):
234
- def __init__(self, project: str, database: Optional[str] = None):
263
+ def __init__(self, database: Optional[str] = None):
235
264
  super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
236
265
  columns = {
237
266
  mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
238
267
  mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
239
268
  mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
240
269
  mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
270
+ mm_schemas.ResultData.RESULT_EXTRA_DATA: _TDEngineColumn.BINARY_1000,
241
271
  }
242
272
  tags = {
273
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
243
274
  mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
244
275
  mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
245
276
  mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
246
277
  mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
247
278
  }
248
- super().__init__(
249
- super_table=super_table,
250
- columns=columns,
251
- tags=tags,
252
- database=database,
253
- project=project,
254
- )
279
+ super().__init__(super_table, columns, tags, database)
255
280
 
256
281
 
257
282
  @dataclass
258
283
  class Metrics(TDEngineSchema):
259
- def __init__(self, project: str, database: Optional[str] = None):
284
+ def __init__(self, database: Optional[str] = None):
260
285
  super_table = mm_schemas.TDEngineSuperTables.METRICS
261
286
  columns = {
262
287
  mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
@@ -264,35 +289,41 @@ class Metrics(TDEngineSchema):
264
289
  mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
265
290
  }
266
291
  tags = {
292
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
267
293
  mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
268
294
  mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
269
295
  mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
270
296
  }
271
- super().__init__(
272
- super_table=super_table,
273
- columns=columns,
274
- tags=tags,
275
- database=database,
276
- project=project,
277
- )
297
+ super().__init__(super_table, columns, tags, database)
278
298
 
279
299
 
280
300
  @dataclass
281
301
  class Predictions(TDEngineSchema):
282
- def __init__(self, project: str, database: Optional[str] = None):
302
+ def __init__(self, database: Optional[str] = None):
283
303
  super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
284
304
  columns = {
285
305
  mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
286
306
  mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
287
- mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
307
+ mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_1000,
288
308
  }
289
309
  tags = {
310
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
290
311
  mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
291
312
  }
292
- super().__init__(
293
- super_table=super_table,
294
- columns=columns,
295
- tags=tags,
296
- database=database,
297
- project=project,
298
- )
313
+ super().__init__(super_table, columns, tags, database)
314
+
315
+
316
+ @dataclass
317
+ class Errors(TDEngineSchema):
318
+ def __init__(self, database: Optional[str] = None):
319
+ super_table = mm_schemas.TDEngineSuperTables.ERRORS
320
+ columns = {
321
+ mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
322
+ mm_schemas.EventFieldType.MODEL_ERROR: _TDEngineColumn.BINARY_1000,
323
+ }
324
+ tags = {
325
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
326
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
327
+ mm_schemas.EventFieldType.ERROR_TYPE: _TDEngineColumn.BINARY_64,
328
+ }
329
+ super().__init__(super_table, columns, tags, database)
@@ -13,12 +13,14 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import json
16
+ from datetime import datetime
16
17
 
17
18
  import mlrun.feature_store.steps
18
19
  from mlrun.common.schemas.model_monitoring import (
19
20
  EventFieldType,
20
21
  EventKeyMetrics,
21
22
  )
23
+ from mlrun.utils import logger
22
24
 
23
25
 
24
26
  class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
@@ -40,3 +42,34 @@ class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
40
42
  event[EventFieldType.TABLE_COLUMN] = "_" + event.get(EventFieldType.ENDPOINT_ID)
41
43
 
42
44
  return event
45
+
46
+
47
+ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
48
+ def __init__(self, **kwargs):
49
+ """
50
+ Prepare the event for insertion into the TDEngine error table
51
+ """
52
+ super().__init__(**kwargs)
53
+
54
+ def do(self, event):
55
+ error = str(event.get("error"))
56
+ if len(error) > 1000:
57
+ error = error[-1000:]
58
+ logger.warning(
59
+ f"Error message exceeds 1000 chars: The error message writen to TSDB will be it last "
60
+ f"1000 chars, Error: {error}",
61
+ event=event,
62
+ )
63
+ timestamp = datetime.fromisoformat(event.get("when"))
64
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
65
+ event = {
66
+ EventFieldType.MODEL_ERROR: error,
67
+ EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
68
+ EventFieldType.ENDPOINT_ID: endpoint_id,
69
+ EventFieldType.TIME: timestamp,
70
+ EventFieldType.PROJECT: event[EventFieldType.FUNCTION_URI].split("/")[0],
71
+ EventFieldType.TABLE_COLUMN: "_err_"
72
+ + event.get(EventFieldType.ENDPOINT_ID),
73
+ }
74
+ logger.info("Write error to errors TSDB table", event=event)
75
+ return event