mlrun 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +1 -2
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +90 -16
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +279 -59
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +75 -38
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +66 -18
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +191 -186
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +1 -1
  243. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  244. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  245. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  246. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  247. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  248. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  249. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  250. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  251. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  252. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  253. mlrun/model_monitoring/model_endpoint.py +0 -118
  254. mlrun-1.7.1rc4.dist-info/RECORD +0 -351
  255. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  256. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -13,8 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import typing
16
- from datetime import datetime
17
- from typing import Union
16
+ from datetime import datetime, timedelta, timezone
18
17
 
19
18
  import pandas as pd
20
19
  import taosws
@@ -82,13 +81,16 @@ class TDEngineConnector(TSDBConnector):
82
81
  """Initialize the super tables for the TSDB."""
83
82
  self.tables = {
84
83
  mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
85
- self.database
84
+ project=self.project, database=self.database
86
85
  ),
87
86
  mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
88
- self.database
87
+ project=self.project, database=self.database
89
88
  ),
90
89
  mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
91
- self.database
90
+ project=self.project, database=self.database
91
+ ),
92
+ mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
93
+ project=self.project, database=self.database
92
94
  ),
93
95
  }
94
96
 
@@ -112,11 +114,9 @@ class TDEngineConnector(TSDBConnector):
112
114
  """
113
115
 
114
116
  table_name = (
115
- f"{self.project}_"
116
117
  f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
117
- f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}_"
118
+ f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
118
119
  )
119
- event[mm_schemas.EventFieldType.PROJECT] = self.project
120
120
 
121
121
  if kind == mm_schemas.WriterEventKind.RESULT:
122
122
  # Write a new result
@@ -124,7 +124,6 @@ class TDEngineConnector(TSDBConnector):
124
124
  table_name = (
125
125
  f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
126
126
  ).replace("-", "_")
127
- event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
128
127
 
129
128
  else:
130
129
  # Write a new metric
@@ -165,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
165
164
  def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
166
165
  return datetime.fromisoformat(val) if isinstance(val, str) else val
167
166
 
168
- def apply_monitoring_stream_steps(self, graph):
167
+ def apply_monitoring_stream_steps(self, graph, **kwarg):
169
168
  """
170
169
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
171
170
  different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -187,7 +186,9 @@ class TDEngineConnector(TSDBConnector):
187
186
  name=name,
188
187
  after=after,
189
188
  url=self._tdengine_connection_string,
190
- supertable=mm_schemas.TDEngineSuperTables.PREDICTIONS,
189
+ supertable=self.tables[
190
+ mm_schemas.TDEngineSuperTables.PREDICTIONS
191
+ ].super_table,
191
192
  table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
192
193
  time_col=mm_schemas.EventFieldType.TIME,
193
194
  database=self.database,
@@ -196,7 +197,6 @@ class TDEngineConnector(TSDBConnector):
196
197
  mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
197
198
  ],
198
199
  tag_cols=[
199
- mm_schemas.EventFieldType.PROJECT,
200
200
  mm_schemas.EventFieldType.ENDPOINT_ID,
201
201
  ],
202
202
  max_events=1000,
@@ -209,8 +209,37 @@ class TDEngineConnector(TSDBConnector):
209
209
  after="ProcessBeforeTDEngine",
210
210
  )
211
211
 
212
- def handle_model_error(self, graph, **kwargs) -> None:
213
- pass
212
+ def handle_model_error(
213
+ self,
214
+ graph,
215
+ tsdb_batching_max_events: int = 1000,
216
+ tsdb_batching_timeout_secs: int = 30,
217
+ **kwargs,
218
+ ) -> None:
219
+ graph.add_step(
220
+ "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
221
+ name="error_extractor",
222
+ after="ForwardError",
223
+ )
224
+ graph.add_step(
225
+ "storey.TDEngineTarget",
226
+ name="tsdb_error",
227
+ after="error_extractor",
228
+ url=self._tdengine_connection_string,
229
+ supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
230
+ table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
231
+ time_col=mm_schemas.EventFieldType.TIME,
232
+ database=self.database,
233
+ columns=[
234
+ mm_schemas.EventFieldType.MODEL_ERROR,
235
+ ],
236
+ tag_cols=[
237
+ mm_schemas.EventFieldType.ENDPOINT_ID,
238
+ mm_schemas.EventFieldType.ERROR_TYPE,
239
+ ],
240
+ max_events=tsdb_batching_max_events,
241
+ flush_after_seconds=tsdb_batching_timeout_secs,
242
+ )
214
243
 
215
244
  def delete_tsdb_resources(self):
216
245
  """
@@ -220,22 +249,23 @@ class TDEngineConnector(TSDBConnector):
220
249
  "Deleting all project resources using the TDEngine connector",
221
250
  project=self.project,
222
251
  )
252
+ drop_statements = []
223
253
  for table in self.tables:
224
- get_subtable_names_query = self.tables[table]._get_subtables_query(
225
- values={mm_schemas.EventFieldType.PROJECT: self.project}
226
- )
227
- subtables = self.connection.run(
228
- query=get_subtable_names_query,
254
+ drop_statements.append(self.tables[table].drop_supertable_query())
255
+
256
+ try:
257
+ self.connection.run(
258
+ statements=drop_statements,
229
259
  timeout=self._timeout,
230
260
  retries=self._retries,
231
- ).data
232
- drop_statements = []
233
- for subtable in subtables:
234
- drop_statements.append(
235
- self.tables[table]._drop_subtable_query(subtable=subtable[0])
236
- )
237
- self.connection.run(
238
- statements=drop_statements, timeout=self._timeout, retries=self._retries
261
+ )
262
+ except Exception as e:
263
+ logger.warning(
264
+ "Failed to drop TDEngine tables. You may need to drop them manually. "
265
+ "These can be found under the following supertables: app_results, "
266
+ "metrics, and predictions.",
267
+ project=self.project,
268
+ error=mlrun.errors.err_to_str(e),
239
269
  )
240
270
  logger.debug(
241
271
  "Deleted all project resources using the TDEngine connector",
@@ -264,6 +294,10 @@ class TDEngineConnector(TSDBConnector):
264
294
  limit: typing.Optional[int] = None,
265
295
  sliding_window_step: typing.Optional[str] = None,
266
296
  timestamp_column: str = mm_schemas.EventFieldType.TIME,
297
+ group_by: typing.Optional[typing.Union[list[str], str]] = None,
298
+ preform_agg_columns: typing.Optional[list] = None,
299
+ order_by: typing.Optional[str] = None,
300
+ desc: typing.Optional[bool] = None,
267
301
  ) -> pd.DataFrame:
268
302
  """
269
303
  Getting records from TSDB data collection.
@@ -283,18 +317,19 @@ class TDEngineConnector(TSDBConnector):
283
317
  `sliding_window_step` is provided, interval must be provided as well. Provided
284
318
  as a string in the format of '1m', '1h', etc.
285
319
  :param timestamp_column: The column name that holds the timestamp index.
320
+ :param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
321
+ functions must bg provided
322
+ :param preform_agg_columns: The columns to preform aggregation on.
323
+ notice that all aggregation functions provided will preform on those columns.
324
+ If not provided The default behavior is to preform on all columns in columns,
325
+ if an empty list was provided The aggregation won't be performed.
326
+ :param order_by: The column or alias to preform ordering on the query.
327
+ :param desc: Whether or not to sort the results in descending order.
286
328
 
287
329
  :return: DataFrame with the provided attributes from the data collection.
288
330
  :raise: MLRunInvalidArgumentError if query the provided table failed.
289
331
  """
290
332
 
291
- project_condition = f"project = '{self.project}'"
292
- filter_query = (
293
- f"({filter_query}) AND ({project_condition})"
294
- if filter_query
295
- else project_condition
296
- )
297
-
298
333
  full_query = tdengine_schemas.TDEngineSchema._get_records_query(
299
334
  table=table,
300
335
  start=start,
@@ -307,6 +342,10 @@ class TDEngineConnector(TSDBConnector):
307
342
  sliding_window_step=sliding_window_step,
308
343
  timestamp_column=timestamp_column,
309
344
  database=self.database,
345
+ group_by=group_by,
346
+ preform_agg_funcs_columns=preform_agg_columns,
347
+ order_by=order_by,
348
+ desc=desc,
310
349
  )
311
350
  logger.debug("Querying TDEngine", query=full_query)
312
351
  try:
@@ -329,6 +368,7 @@ class TDEngineConnector(TSDBConnector):
329
368
  end: datetime,
330
369
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
331
370
  type: typing.Literal["metrics", "results"],
371
+ with_result_extra_data: bool = False,
332
372
  ) -> typing.Union[
333
373
  list[
334
374
  typing.Union[
@@ -346,12 +386,18 @@ class TDEngineConnector(TSDBConnector):
346
386
  timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
347
387
  columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
348
388
  if type == "metrics":
349
- table = mm_schemas.TDEngineSuperTables.METRICS
389
+ if with_result_extra_data:
390
+ logger.warning(
391
+ "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
392
+ project=self.project,
393
+ endpoint_id=endpoint_id,
394
+ )
395
+ table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
350
396
  name = mm_schemas.MetricData.METRIC_NAME
351
397
  columns += [name, mm_schemas.MetricData.METRIC_VALUE]
352
398
  df_handler = self.df_to_metrics_values
353
399
  elif type == "results":
354
- table = mm_schemas.TDEngineSuperTables.APP_RESULTS
400
+ table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
355
401
  name = mm_schemas.ResultData.RESULT_NAME
356
402
  columns += [
357
403
  name,
@@ -359,6 +405,8 @@ class TDEngineConnector(TSDBConnector):
359
405
  mm_schemas.ResultData.RESULT_STATUS,
360
406
  mm_schemas.ResultData.RESULT_KIND,
361
407
  ]
408
+ if with_result_extra_data:
409
+ columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
362
410
  df_handler = self.df_to_results_values
363
411
  else:
364
412
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -395,6 +443,10 @@ class TDEngineConnector(TSDBConnector):
395
443
  is_empty=df.empty,
396
444
  )
397
445
 
446
+ if not with_result_extra_data and type == "results":
447
+ # Set the extra data to an empty string if it's not requested
448
+ df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
449
+
398
450
  return df_handler(df=df, metrics=metrics, project=self.project)
399
451
 
400
452
  def read_predictions(
@@ -417,7 +469,7 @@ class TDEngineConnector(TSDBConnector):
417
469
  "both or neither of `aggregation_window` and `agg_funcs` must be provided"
418
470
  )
419
471
  df = self._get_records(
420
- table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
472
+ table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
421
473
  start=start,
422
474
  end=end,
423
475
  columns=[mm_schemas.EventFieldType.LATENCY],
@@ -458,51 +510,219 @@ class TDEngineConnector(TSDBConnector):
458
510
 
459
511
  def get_last_request(
460
512
  self,
461
- endpoint_ids: Union[str, list[str]],
462
- start: Union[datetime, str] = "0",
463
- end: Union[datetime, str] = "now",
513
+ endpoint_ids: typing.Union[str, list[str]],
514
+ start: typing.Optional[datetime] = None,
515
+ end: typing.Optional[datetime] = None,
464
516
  ) -> pd.DataFrame:
465
- pass
517
+ endpoint_ids = (
518
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
519
+ )
520
+ start, end = self._get_start_end(start, end)
521
+ df = self._get_records(
522
+ table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
523
+ start=start,
524
+ end=end,
525
+ columns=[
526
+ mm_schemas.EventFieldType.ENDPOINT_ID,
527
+ mm_schemas.EventFieldType.TIME,
528
+ mm_schemas.EventFieldType.LATENCY,
529
+ ],
530
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
531
+ timestamp_column=mm_schemas.EventFieldType.TIME,
532
+ agg_funcs=["last"],
533
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
534
+ preform_agg_columns=[mm_schemas.EventFieldType.TIME],
535
+ )
536
+ if not df.empty:
537
+ df.dropna(inplace=True)
538
+ df.rename(
539
+ columns={
540
+ f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
541
+ f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
542
+ },
543
+ inplace=True,
544
+ )
545
+ df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
546
+ mm_schemas.EventFieldType.LAST_REQUEST
547
+ ].map(
548
+ lambda last_request: datetime.strptime(
549
+ last_request, "%Y-%m-%d %H:%M:%S.%f %z"
550
+ ).astimezone(tz=timezone.utc)
551
+ )
552
+ return df
466
553
 
467
554
  def get_drift_status(
468
555
  self,
469
- endpoint_ids: Union[str, list[str]],
470
- start: Union[datetime, str] = "now-24h",
471
- end: Union[datetime, str] = "now",
556
+ endpoint_ids: typing.Union[str, list[str]],
557
+ start: typing.Optional[datetime] = None,
558
+ end: typing.Optional[datetime] = None,
472
559
  ) -> pd.DataFrame:
473
- pass
560
+ endpoint_ids = (
561
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
562
+ )
563
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
564
+ start, end = self._get_start_end(start, end)
565
+ df = self._get_records(
566
+ table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
567
+ start=start,
568
+ end=end,
569
+ columns=[
570
+ mm_schemas.ResultData.RESULT_STATUS,
571
+ mm_schemas.EventFieldType.ENDPOINT_ID,
572
+ ],
573
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
574
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
575
+ agg_funcs=["max"],
576
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
577
+ preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
578
+ )
579
+ df.rename(
580
+ columns={
581
+ f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
582
+ },
583
+ inplace=True,
584
+ )
585
+ if not df.empty:
586
+ df.dropna(inplace=True)
587
+ return df
474
588
 
475
589
  def get_metrics_metadata(
476
590
  self,
477
591
  endpoint_id: str,
478
- start: Union[datetime, str] = "0",
479
- end: Union[datetime, str] = "now",
592
+ start: typing.Optional[datetime] = None,
593
+ end: typing.Optional[datetime] = None,
480
594
  ) -> pd.DataFrame:
481
- pass
595
+ start, end = self._get_start_end(start, end)
596
+ df = self._get_records(
597
+ table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
598
+ start=start,
599
+ end=end,
600
+ columns=[
601
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
602
+ mm_schemas.MetricData.METRIC_NAME,
603
+ mm_schemas.EventFieldType.ENDPOINT_ID,
604
+ ],
605
+ filter_query=f"endpoint_id='{endpoint_id}'",
606
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
607
+ group_by=[
608
+ mm_schemas.WriterEvent.APPLICATION_NAME,
609
+ mm_schemas.MetricData.METRIC_NAME,
610
+ ],
611
+ agg_funcs=["last"],
612
+ )
613
+ df.rename(
614
+ columns={
615
+ f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
616
+ f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
617
+ f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
618
+ },
619
+ inplace=True,
620
+ )
621
+ if not df.empty:
622
+ df.dropna(inplace=True)
623
+ return df
482
624
 
483
625
  def get_results_metadata(
484
626
  self,
485
627
  endpoint_id: str,
486
- start: Union[datetime, str] = "0",
487
- end: Union[datetime, str] = "now",
628
+ start: typing.Optional[datetime] = None,
629
+ end: typing.Optional[datetime] = None,
488
630
  ) -> pd.DataFrame:
489
- pass
631
+ start, end = self._get_start_end(start, end)
632
+ df = self._get_records(
633
+ table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
634
+ start=start,
635
+ end=end,
636
+ columns=[
637
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
638
+ mm_schemas.ResultData.RESULT_NAME,
639
+ mm_schemas.ResultData.RESULT_KIND,
640
+ mm_schemas.EventFieldType.ENDPOINT_ID,
641
+ ],
642
+ filter_query=f"endpoint_id='{endpoint_id}'",
643
+ timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
644
+ group_by=[
645
+ mm_schemas.WriterEvent.APPLICATION_NAME,
646
+ mm_schemas.ResultData.RESULT_NAME,
647
+ ],
648
+ agg_funcs=["last"],
649
+ )
650
+ df.rename(
651
+ columns={
652
+ f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
653
+ f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
654
+ f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
655
+ f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
656
+ },
657
+ inplace=True,
658
+ )
659
+ if not df.empty:
660
+ df.dropna(inplace=True)
661
+ return df
490
662
 
491
663
  def get_error_count(
492
664
  self,
493
- endpoint_ids: Union[str, list[str]],
494
- start: Union[datetime, str] = "0",
495
- end: Union[datetime, str] = "now",
665
+ endpoint_ids: typing.Union[str, list[str]],
666
+ start: typing.Optional[datetime] = None,
667
+ end: typing.Optional[datetime] = None,
496
668
  ) -> pd.DataFrame:
497
- pass
669
+ endpoint_ids = (
670
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
671
+ )
672
+ start, end = self._get_start_end(start, end)
673
+ df = self._get_records(
674
+ table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
675
+ start=start,
676
+ end=end,
677
+ columns=[
678
+ mm_schemas.EventFieldType.MODEL_ERROR,
679
+ mm_schemas.EventFieldType.ENDPOINT_ID,
680
+ ],
681
+ agg_funcs=["count"],
682
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
683
+ f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
684
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
685
+ preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
686
+ )
687
+ df.rename(
688
+ columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
689
+ inplace=True,
690
+ )
691
+ if not df.empty:
692
+ df.dropna(inplace=True)
693
+ return df
498
694
 
499
695
  def get_avg_latency(
500
696
  self,
501
- endpoint_ids: Union[str, list[str]],
502
- start: Union[datetime, str] = "0",
503
- end: Union[datetime, str] = "now",
697
+ endpoint_ids: typing.Union[str, list[str]],
698
+ start: typing.Optional[datetime] = None,
699
+ end: typing.Optional[datetime] = None,
504
700
  ) -> pd.DataFrame:
505
- pass
701
+ endpoint_ids = (
702
+ endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
703
+ )
704
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
705
+ start, end = self._get_start_end(start, end)
706
+ df = self._get_records(
707
+ table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
708
+ start=start,
709
+ end=end,
710
+ columns=[
711
+ mm_schemas.EventFieldType.LATENCY,
712
+ mm_schemas.EventFieldType.ENDPOINT_ID,
713
+ ],
714
+ agg_funcs=["avg"],
715
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
716
+ group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
717
+ preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
718
+ )
719
+ df.rename(
720
+ columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
721
+ inplace=True,
722
+ )
723
+ if not df.empty:
724
+ df.dropna(inplace=True)
725
+ return df
506
726
 
507
727
  # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
508
728
  #
@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
150
150
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
151
151
  event = {
152
152
  EventFieldType.MODEL_ERROR: str(error),
153
+ EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
153
154
  EventFieldType.ENDPOINT_ID: endpoint_id,
154
155
  EventFieldType.TIMESTAMP: timestamp,
155
156
  EventFieldType.ERROR_COUNT: 1.0,