mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (90) hide show
  1. mlrun/__main__.py +5 -2
  2. mlrun/alerts/alert.py +1 -1
  3. mlrun/artifacts/manager.py +5 -1
  4. mlrun/common/constants.py +64 -3
  5. mlrun/common/formatters/__init__.py +16 -0
  6. mlrun/common/formatters/base.py +59 -0
  7. mlrun/common/formatters/function.py +41 -0
  8. mlrun/common/runtimes/constants.py +32 -4
  9. mlrun/common/schemas/__init__.py +1 -2
  10. mlrun/common/schemas/alert.py +31 -9
  11. mlrun/common/schemas/api_gateway.py +52 -0
  12. mlrun/common/schemas/client_spec.py +1 -0
  13. mlrun/common/schemas/frontend_spec.py +1 -0
  14. mlrun/common/schemas/function.py +4 -0
  15. mlrun/common/schemas/model_monitoring/__init__.py +9 -4
  16. mlrun/common/schemas/model_monitoring/constants.py +22 -8
  17. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  18. mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
  19. mlrun/config.py +9 -2
  20. mlrun/data_types/to_pandas.py +5 -5
  21. mlrun/datastore/datastore.py +6 -2
  22. mlrun/datastore/redis.py +2 -2
  23. mlrun/datastore/s3.py +5 -0
  24. mlrun/datastore/sources.py +106 -7
  25. mlrun/datastore/store_resources.py +5 -1
  26. mlrun/datastore/targets.py +5 -4
  27. mlrun/datastore/utils.py +42 -0
  28. mlrun/db/base.py +5 -1
  29. mlrun/db/httpdb.py +22 -3
  30. mlrun/db/nopdb.py +5 -1
  31. mlrun/errors.py +6 -0
  32. mlrun/execution.py +16 -6
  33. mlrun/feature_store/ingestion.py +7 -6
  34. mlrun/feature_store/retrieval/conversion.py +5 -5
  35. mlrun/feature_store/retrieval/job.py +7 -3
  36. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  37. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
  38. mlrun/frameworks/parallel_coordinates.py +2 -1
  39. mlrun/frameworks/tf_keras/__init__.py +4 -1
  40. mlrun/launcher/client.py +4 -2
  41. mlrun/launcher/local.py +8 -2
  42. mlrun/launcher/remote.py +8 -2
  43. mlrun/model.py +5 -1
  44. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  45. mlrun/model_monitoring/db/stores/base/store.py +16 -4
  46. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  47. mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
  48. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
  49. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  50. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +235 -166
  51. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
  52. mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
  53. mlrun/model_monitoring/db/tsdb/base.py +232 -38
  54. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  55. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  56. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  57. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  58. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  59. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +292 -104
  60. mlrun/model_monitoring/helpers.py +45 -0
  61. mlrun/model_monitoring/stream_processing.py +7 -4
  62. mlrun/model_monitoring/writer.py +50 -20
  63. mlrun/package/utils/_formatter.py +2 -2
  64. mlrun/projects/operations.py +8 -5
  65. mlrun/projects/pipelines.py +42 -15
  66. mlrun/projects/project.py +55 -14
  67. mlrun/render.py +8 -5
  68. mlrun/runtimes/base.py +2 -1
  69. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  70. mlrun/runtimes/local.py +4 -1
  71. mlrun/runtimes/nuclio/api_gateway.py +32 -8
  72. mlrun/runtimes/nuclio/application/application.py +3 -3
  73. mlrun/runtimes/nuclio/function.py +1 -4
  74. mlrun/runtimes/utils.py +5 -6
  75. mlrun/serving/server.py +2 -1
  76. mlrun/utils/async_http.py +25 -5
  77. mlrun/utils/helpers.py +28 -7
  78. mlrun/utils/logger.py +28 -1
  79. mlrun/utils/notifications/notification/__init__.py +14 -9
  80. mlrun/utils/notifications/notification/slack.py +27 -7
  81. mlrun/utils/notifications/notification_pusher.py +47 -42
  82. mlrun/utils/v3io_clients.py +0 -1
  83. mlrun/utils/version/version.json +2 -2
  84. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +9 -4
  85. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +89 -82
  86. mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -134
  87. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
  88. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
  89. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
  90. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
@@ -12,23 +12,27 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import datetime
15
+ import typing
16
+ from datetime import datetime
17
+ from io import StringIO
18
+ from typing import Literal, Optional, Union
16
19
 
17
20
  import pandas as pd
18
21
  import v3io_frames.client
19
22
  import v3io_frames.errors
20
- from v3io.dataplane import Client as V3IOClient
21
23
  from v3io_frames.frames_pb2 import IGNORE
22
24
 
23
25
  import mlrun.common.model_monitoring
24
- import mlrun.common.schemas.model_monitoring as mm_constants
26
+ import mlrun.common.schemas.model_monitoring as mm_schemas
25
27
  import mlrun.feature_store.steps
26
28
  import mlrun.utils.v3io_clients
27
29
  from mlrun.model_monitoring.db import TSDBConnector
30
+ from mlrun.model_monitoring.helpers import get_invocations_fqn
28
31
  from mlrun.utils import logger
29
32
 
30
33
  _TSDB_BE = "tsdb"
31
34
  _TSDB_RATE = "1/s"
35
+ _CONTAINER = "users"
32
36
 
33
37
 
34
38
  class V3IOTSDBConnector(TSDBConnector):
@@ -37,16 +41,16 @@ class V3IOTSDBConnector(TSDBConnector):
37
41
  Client that provides API for executing commands on the V3IO TSDB table.
38
42
  """
39
43
 
44
+ type: str = mm_schemas.TSDBTarget.V3IO_TSDB
45
+
40
46
  def __init__(
41
47
  self,
42
48
  project: str,
43
- access_key: str = None,
44
- container: str = "users",
45
- v3io_framesd: str = None,
49
+ container: str = _CONTAINER,
50
+ v3io_framesd: typing.Optional[str] = None,
46
51
  create_table: bool = False,
47
- ):
52
+ ) -> None:
48
53
  super().__init__(project=project)
49
- self.access_key = access_key or mlrun.mlconf.get_v3io_access_key()
50
54
 
51
55
  self.container = container
52
56
 
@@ -54,21 +58,18 @@ class V3IOTSDBConnector(TSDBConnector):
54
58
  self._frames_client: v3io_frames.client.ClientBase = (
55
59
  self._get_v3io_frames_client(self.container)
56
60
  )
57
- self._v3io_client: V3IOClient = mlrun.utils.v3io_clients.get_v3io_client(
58
- endpoint=mlrun.mlconf.v3io_api,
59
- )
60
61
 
61
62
  self._init_tables_path()
62
63
 
63
64
  if create_table:
64
- self.create_tsdb_application_tables()
65
+ self.create_tables()
65
66
 
66
67
  def _init_tables_path(self):
67
68
  self.tables = {}
68
69
 
69
70
  events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
70
71
  project=self.project,
71
- kind=mm_constants.FileTargetKind.EVENTS,
72
+ kind=mm_schemas.FileTargetKind.EVENTS,
72
73
  )
73
74
  (
74
75
  _,
@@ -77,12 +78,12 @@ class V3IOTSDBConnector(TSDBConnector):
77
78
  ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
78
79
  events_table_full_path
79
80
  )
80
- self.tables[mm_constants.MonitoringTSDBTables.EVENTS] = events_path
81
+ self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
81
82
 
82
83
  monitoring_application_full_path = (
83
84
  mlrun.mlconf.get_model_monitoring_file_target_path(
84
85
  project=self.project,
85
- kind=mm_constants.FileTargetKind.MONITORING_APPLICATION,
86
+ kind=mm_schemas.FileTargetKind.MONITORING_APPLICATION,
86
87
  )
87
88
  )
88
89
  (
@@ -92,17 +93,17 @@ class V3IOTSDBConnector(TSDBConnector):
92
93
  ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
93
94
  monitoring_application_full_path
94
95
  )
95
- self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS] = (
96
- monitoring_application_path + mm_constants.MonitoringTSDBTables.APP_RESULTS
96
+ self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS] = (
97
+ monitoring_application_path + mm_schemas.V3IOTSDBTables.APP_RESULTS
97
98
  )
98
- self.tables[mm_constants.MonitoringTSDBTables.METRICS] = (
99
- monitoring_application_path + mm_constants.MonitoringTSDBTables.METRICS
99
+ self.tables[mm_schemas.V3IOTSDBTables.METRICS] = (
100
+ monitoring_application_path + mm_schemas.V3IOTSDBTables.METRICS
100
101
  )
101
102
 
102
103
  monitoring_predictions_full_path = (
103
104
  mlrun.mlconf.get_model_monitoring_file_target_path(
104
105
  project=self.project,
105
- kind=mm_constants.FileTargetKind.PREDICTIONS,
106
+ kind=mm_schemas.FileTargetKind.PREDICTIONS,
106
107
  )
107
108
  )
108
109
  (
@@ -112,25 +113,25 @@ class V3IOTSDBConnector(TSDBConnector):
112
113
  ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
113
114
  monitoring_predictions_full_path
114
115
  )
115
- self.tables[mm_constants.FileTargetKind.PREDICTIONS] = (
116
- monitoring_predictions_path
117
- )
116
+ self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
118
117
 
119
- def create_tsdb_application_tables(self):
118
+ def create_tables(self) -> None:
120
119
  """
121
- Create the application tables using the TSDB connector. At the moment we support 2 types of application tables:
120
+ Create the tables using the TSDB connector. The tables are being created in the V3IO TSDB and include:
122
121
  - app_results: a detailed result that includes status, kind, extra data, etc.
123
122
  - metrics: a basic key value that represents a single numeric metric.
123
+ Note that the predictions table is automatically created by the model monitoring stream pod.
124
124
  """
125
125
  application_tables = [
126
- mm_constants.MonitoringTSDBTables.APP_RESULTS,
127
- mm_constants.MonitoringTSDBTables.METRICS,
126
+ mm_schemas.V3IOTSDBTables.APP_RESULTS,
127
+ mm_schemas.V3IOTSDBTables.METRICS,
128
128
  ]
129
- for table in application_tables:
130
- logger.info("Creating table in V3IO TSDB", table=table)
129
+ for table_name in application_tables:
130
+ logger.info("Creating table in V3IO TSDB", table_name=table_name)
131
+ table = self.tables[table_name]
131
132
  self._frames_client.create(
132
133
  backend=_TSDB_BE,
133
- table=self.tables[table],
134
+ table=table,
134
135
  if_exists=IGNORE,
135
136
  rate=_TSDB_RATE,
136
137
  )
@@ -156,20 +157,20 @@ class V3IOTSDBConnector(TSDBConnector):
156
157
  "storey.TSDBTarget",
157
158
  name="tsdb_predictions",
158
159
  after="MapFeatureNames",
159
- path=f"{self.container}/{self.tables[mm_constants.FileTargetKind.PREDICTIONS]}",
160
+ path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
160
161
  rate="1/s",
161
- time_col=mm_constants.EventFieldType.TIMESTAMP,
162
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
162
163
  container=self.container,
163
164
  v3io_frames=self.v3io_framesd,
164
165
  columns=["latency"],
165
166
  index_cols=[
166
- mm_constants.EventFieldType.ENDPOINT_ID,
167
+ mm_schemas.EventFieldType.ENDPOINT_ID,
167
168
  ],
168
169
  aggr="count,avg",
169
170
  aggr_granularity="1m",
170
171
  max_events=tsdb_batching_max_events,
171
172
  flush_after_seconds=tsdb_batching_timeout_secs,
172
- key=mm_constants.EventFieldType.ENDPOINT_ID,
173
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
173
174
  )
174
175
 
175
176
  # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
@@ -198,40 +199,40 @@ class V3IOTSDBConnector(TSDBConnector):
198
199
  "storey.TSDBTarget",
199
200
  name=name,
200
201
  after=after,
201
- path=f"{self.container}/{self.tables[mm_constants.MonitoringTSDBTables.EVENTS]}",
202
+ path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.EVENTS]}",
202
203
  rate="10/m",
203
- time_col=mm_constants.EventFieldType.TIMESTAMP,
204
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
204
205
  container=self.container,
205
206
  v3io_frames=self.v3io_framesd,
206
207
  infer_columns_from_data=True,
207
208
  index_cols=[
208
- mm_constants.EventFieldType.ENDPOINT_ID,
209
- mm_constants.EventFieldType.RECORD_TYPE,
210
- mm_constants.EventFieldType.ENDPOINT_TYPE,
209
+ mm_schemas.EventFieldType.ENDPOINT_ID,
210
+ mm_schemas.EventFieldType.RECORD_TYPE,
211
+ mm_schemas.EventFieldType.ENDPOINT_TYPE,
211
212
  ],
212
213
  max_events=tsdb_batching_max_events,
213
214
  flush_after_seconds=tsdb_batching_timeout_secs,
214
- key=mm_constants.EventFieldType.ENDPOINT_ID,
215
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
215
216
  )
216
217
 
217
218
  # unpacked base_metrics dictionary
218
219
  apply_filter_and_unpacked_keys(
219
220
  name="FilterAndUnpackKeys1",
220
- keys=mm_constants.EventKeyMetrics.BASE_METRICS,
221
+ keys=mm_schemas.EventKeyMetrics.BASE_METRICS,
221
222
  )
222
223
  apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
223
224
 
224
225
  # unpacked endpoint_features dictionary
225
226
  apply_filter_and_unpacked_keys(
226
227
  name="FilterAndUnpackKeys2",
227
- keys=mm_constants.EventKeyMetrics.ENDPOINT_FEATURES,
228
+ keys=mm_schemas.EventKeyMetrics.ENDPOINT_FEATURES,
228
229
  )
229
230
  apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
230
231
 
231
232
  # unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
232
233
  apply_filter_and_unpacked_keys(
233
234
  name="FilterAndUnpackKeys3",
234
- keys=mm_constants.EventKeyMetrics.CUSTOM_METRICS,
235
+ keys=mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
235
236
  )
236
237
 
237
238
  def apply_storey_filter():
@@ -248,62 +249,58 @@ class V3IOTSDBConnector(TSDBConnector):
248
249
  def write_application_event(
249
250
  self,
250
251
  event: dict,
251
- kind: mm_constants.WriterEventKind = mm_constants.WriterEventKind.RESULT,
252
- ):
252
+ kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
253
+ ) -> None:
253
254
  """Write a single result or metric to TSDB"""
254
255
 
255
- event[mm_constants.WriterEvent.END_INFER_TIME] = (
256
- datetime.datetime.fromisoformat(
257
- event[mm_constants.WriterEvent.END_INFER_TIME]
258
- )
256
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = datetime.fromisoformat(
257
+ event[mm_schemas.WriterEvent.END_INFER_TIME]
259
258
  )
259
+ index_cols_base = [
260
+ mm_schemas.WriterEvent.END_INFER_TIME,
261
+ mm_schemas.WriterEvent.ENDPOINT_ID,
262
+ mm_schemas.WriterEvent.APPLICATION_NAME,
263
+ ]
260
264
 
261
- if kind == mm_constants.WriterEventKind.METRIC:
262
- # TODO : Implement the logic for writing metrics to V3IO TSDB
263
- return
265
+ if kind == mm_schemas.WriterEventKind.METRIC:
266
+ table = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
267
+ index_cols = index_cols_base + [mm_schemas.MetricData.METRIC_NAME]
268
+ elif kind == mm_schemas.WriterEventKind.RESULT:
269
+ table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
270
+ index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
271
+ del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
272
+ else:
273
+ raise ValueError(f"Invalid {kind = }")
264
274
 
265
- del event[mm_constants.ResultData.RESULT_EXTRA_DATA]
266
275
  try:
267
276
  self._frames_client.write(
268
277
  backend=_TSDB_BE,
269
- table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
278
+ table=table,
270
279
  dfs=pd.DataFrame.from_records([event]),
271
- index_cols=[
272
- mm_constants.WriterEvent.END_INFER_TIME,
273
- mm_constants.WriterEvent.ENDPOINT_ID,
274
- mm_constants.WriterEvent.APPLICATION_NAME,
275
- mm_constants.ResultData.RESULT_NAME,
276
- ],
277
- )
278
- logger.info(
279
- "Updated V3IO TSDB successfully",
280
- table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
280
+ index_cols=index_cols,
281
281
  )
282
+ logger.info("Updated V3IO TSDB successfully", table=table)
282
283
  except v3io_frames.errors.Error as err:
283
- logger.warn(
284
+ logger.exception(
284
285
  "Could not write drift measures to TSDB",
285
286
  err=err,
286
- table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
287
+ table=table,
287
288
  event=event,
288
289
  )
289
-
290
290
  raise mlrun.errors.MLRunRuntimeError(
291
291
  f"Failed to write application result to TSDB: {err}"
292
292
  )
293
293
 
294
- def delete_tsdb_resources(self, table: str = None):
294
+ def delete_tsdb_resources(self, table: typing.Optional[str] = None):
295
295
  if table:
296
296
  # Delete a specific table
297
297
  tables = [table]
298
298
  else:
299
299
  # Delete all tables
300
- tables = mm_constants.MonitoringTSDBTables.list()
301
- for table in tables:
300
+ tables = mm_schemas.V3IOTSDBTables.list()
301
+ for table_to_delete in tables:
302
302
  try:
303
- self._frames_client.delete(
304
- backend=mlrun.common.schemas.model_monitoring.TimeSeriesConnector.TSDB,
305
- table=table,
306
- )
303
+ self._frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
307
304
  except v3io_frames.errors.DeleteError as e:
308
305
  logger.warning(
309
306
  f"Failed to delete TSDB table '{table}'",
@@ -317,11 +314,7 @@ class V3IOTSDBConnector(TSDBConnector):
317
314
  store.rm(tsdb_path, recursive=True)
318
315
 
319
316
  def get_model_endpoint_real_time_metrics(
320
- self,
321
- endpoint_id: str,
322
- metrics: list[str],
323
- start: str = "now-1h",
324
- end: str = "now",
317
+ self, endpoint_id: str, metrics: list[str], start: str, end: str
325
318
  ) -> dict[str, list[tuple[str, float]]]:
326
319
  """
327
320
  Getting real time metrics from the TSDB. There are pre-defined metrics for model endpoints such as
@@ -349,8 +342,8 @@ class V3IOTSDBConnector(TSDBConnector):
349
342
  metrics_mapping = {}
350
343
 
351
344
  try:
352
- data = self.get_records(
353
- table=mm_constants.MonitoringTSDBTables.EVENTS,
345
+ data = self._get_records(
346
+ table=mm_schemas.V3IOTSDBTables.EVENTS,
354
347
  columns=["endpoint_id", *metrics],
355
348
  filter_query=f"endpoint_id=='{endpoint_id}'",
356
349
  start=start,
@@ -374,45 +367,74 @@ class V3IOTSDBConnector(TSDBConnector):
374
367
 
375
368
  return metrics_mapping
376
369
 
377
- def get_records(
370
+ def _get_records(
378
371
  self,
379
372
  table: str,
380
- columns: list[str] = None,
373
+ start: Union[datetime, str],
374
+ end: Union[datetime, str],
375
+ columns: typing.Optional[list[str]] = None,
381
376
  filter_query: str = "",
382
- start: str = "now-1h",
383
- end: str = "now",
377
+ interval: typing.Optional[str] = None,
378
+ agg_funcs: typing.Optional[list] = None,
379
+ limit: typing.Optional[int] = None,
380
+ sliding_window_step: typing.Optional[str] = None,
381
+ **kwargs,
384
382
  ) -> pd.DataFrame:
385
383
  """
386
384
  Getting records from V3IO TSDB data collection.
387
- :param table: Path to the collection to query.
388
- :param columns: Columns to include in the result.
389
- :param filter_query: V3IO filter expression. The expected filter expression includes different conditions,
390
- divided by ' AND '.
391
- :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
392
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
393
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
394
- `'s'` = seconds), or 0 for the earliest time.
395
- :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
396
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
397
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
398
- `'s'` = seconds), or 0 for the earliest time.
385
+ :param table: Path to the collection to query.
386
+ :param start: The start time of the metrics. Can be represented by a string containing an RFC
387
+ 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
388
+ `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
389
+ `'s'` = seconds), or 0 for the earliest time.
390
+ :param end: The end time of the metrics. Can be represented by a string containing an RFC
391
+ 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
392
+ `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
393
+ `'s'` = seconds), or 0 for the earliest time.
394
+ :param columns: Columns to include in the result.
395
+ :param filter_query: V3IO filter expression. The expected filter expression includes different
396
+ conditions, divided by ' AND '.
397
+ :param interval: The interval to aggregate the data by. Note that if interval is provided,
398
+ agg_funcs must bg provided as well. Provided as a string in the format of '1m',
399
+ '1h', etc.
400
+ :param agg_funcs: The aggregation functions to apply on the columns. Note that if `agg_funcs` is
401
+ provided, `interval` must bg provided as well. Provided as a list of strings in
402
+ the format of ['sum', 'avg', 'count', ...].
403
+ :param limit: The maximum number of records to return.
404
+ :param sliding_window_step: The time step for which the time window moves forward. Note that if
405
+ `sliding_window_step` is provided, interval must be provided as well. Provided
406
+ as a string in the format of '1m', '1h', etc.
407
+ :param kwargs: Additional keyword arguments passed to the read method of frames client.
399
408
  :return: DataFrame with the provided attributes from the data collection.
400
409
  :raise: MLRunNotFoundError if the provided table wasn't found.
401
410
  """
402
411
  if table not in self.tables:
403
412
  raise mlrun.errors.MLRunNotFoundError(
404
- f"Table '{table}' does not exist in the tables list of the TSDB connector."
413
+ f"Table '{table}' does not exist in the tables list of the TSDB connector. "
405
414
  f"Available tables: {list(self.tables.keys())}"
406
415
  )
407
- return self._frames_client.read(
408
- backend=mlrun.common.schemas.model_monitoring.TimeSeriesConnector.TSDB,
409
- table=self.tables[table],
410
- columns=columns,
411
- filter=filter_query,
416
+
417
+ if agg_funcs:
418
+ # Frames client expects the aggregators to be a comma-separated string
419
+ agg_funcs = ",".join(agg_funcs)
420
+ table_path = self.tables[table]
421
+ df = self._frames_client.read(
422
+ backend=_TSDB_BE,
423
+ table=table_path,
412
424
  start=start,
413
425
  end=end,
426
+ columns=columns,
427
+ filter=filter_query,
428
+ aggregation_window=interval,
429
+ aggregators=agg_funcs,
430
+ step=sliding_window_step,
431
+ **kwargs,
414
432
  )
415
433
 
434
+ if limit:
435
+ df = df.head(limit)
436
+ return df
437
+
416
438
  def _get_v3io_source_directory(self) -> str:
417
439
  """
418
440
  Get the V3IO source directory for the current project. Usually the source directory will
@@ -422,7 +444,7 @@ class V3IOTSDBConnector(TSDBConnector):
422
444
  """
423
445
  events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
424
446
  project=self.project,
425
- kind=mm_constants.FileTargetKind.EVENTS,
447
+ kind=mm_schemas.FileTargetKind.EVENTS,
426
448
  )
427
449
 
428
450
  # Generate the main directory with the V3IO resources
@@ -440,3 +462,169 @@ class V3IOTSDBConnector(TSDBConnector):
440
462
  address=mlrun.mlconf.v3io_framesd,
441
463
  container=v3io_container,
442
464
  )
465
+
466
+ def read_metrics_data(
467
+ self,
468
+ *,
469
+ endpoint_id: str,
470
+ start: datetime,
471
+ end: datetime,
472
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
473
+ type: Literal["metrics", "results"] = "results",
474
+ ) -> Union[
475
+ list[
476
+ Union[
477
+ mm_schemas.ModelEndpointMonitoringResultValues,
478
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
479
+ ],
480
+ ],
481
+ list[
482
+ Union[
483
+ mm_schemas.ModelEndpointMonitoringMetricValues,
484
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
485
+ ],
486
+ ],
487
+ ]:
488
+ """
489
+ Read metrics OR results from the TSDB and return as a list.
490
+ Note: the type must match the actual metrics in the `metrics` parameter.
491
+ If the type is "results", pass only results in the `metrics` parameter.
492
+ """
493
+
494
+ if type == "metrics":
495
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
496
+ name = mm_schemas.MetricData.METRIC_NAME
497
+ df_handler = self.df_to_metrics_values
498
+ elif type == "results":
499
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
500
+ name = mm_schemas.ResultData.RESULT_NAME
501
+ df_handler = self.df_to_results_values
502
+ else:
503
+ raise ValueError(f"Invalid {type = }")
504
+
505
+ query = self._get_sql_query(
506
+ endpoint_id,
507
+ [(metric.app, metric.name) for metric in metrics],
508
+ table_path=table_path,
509
+ name=name,
510
+ )
511
+
512
+ logger.debug("Querying V3IO TSDB", query=query)
513
+
514
+ df: pd.DataFrame = self._frames_client.read(
515
+ backend=_TSDB_BE,
516
+ start=start,
517
+ end=end,
518
+ query=query, # the filter argument does not work for this complex condition
519
+ )
520
+
521
+ logger.debug(
522
+ "Converting a DataFrame to a list of metrics or results values",
523
+ table=table_path,
524
+ project=self.project,
525
+ endpoint_id=endpoint_id,
526
+ is_empty=df.empty,
527
+ )
528
+
529
+ return df_handler(df=df, metrics=metrics, project=self.project)
530
+
531
+ @staticmethod
532
+ def _get_sql_query(
533
+ endpoint_id: str,
534
+ names: list[tuple[str, str]],
535
+ table_path: str,
536
+ name: str = mm_schemas.ResultData.RESULT_NAME,
537
+ ) -> str:
538
+ """Get the SQL query for the results/metrics table"""
539
+ with StringIO() as query:
540
+ query.write(
541
+ f"SELECT * FROM '{table_path}' "
542
+ f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
543
+ )
544
+ if names:
545
+ query.write(" AND (")
546
+
547
+ for i, (app_name, result_name) in enumerate(names):
548
+ sub_cond = (
549
+ f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}' "
550
+ f"AND {name}='{result_name}')"
551
+ )
552
+ if i != 0: # not first sub condition
553
+ query.write(" OR ")
554
+ query.write(sub_cond)
555
+
556
+ query.write(")")
557
+
558
+ query.write(";")
559
+ return query.getvalue()
560
+
561
+ def read_predictions(
562
+ self,
563
+ *,
564
+ endpoint_id: str,
565
+ start: Union[datetime, str],
566
+ end: Union[datetime, str],
567
+ aggregation_window: Optional[str] = None,
568
+ agg_funcs: Optional[list[str]] = None,
569
+ limit: Optional[int] = None,
570
+ ) -> Union[
571
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
572
+ mm_schemas.ModelEndpointMonitoringMetricValues,
573
+ ]:
574
+ if (agg_funcs and not aggregation_window) or (
575
+ aggregation_window and not agg_funcs
576
+ ):
577
+ raise mlrun.errors.MLRunInvalidArgumentError(
578
+ "both or neither of `aggregation_window` and `agg_funcs` must be provided"
579
+ )
580
+ df = self._get_records(
581
+ table=mm_schemas.FileTargetKind.PREDICTIONS,
582
+ start=start,
583
+ end=end,
584
+ columns=[mm_schemas.EventFieldType.LATENCY],
585
+ filter_query=f"endpoint_id=='{endpoint_id}'",
586
+ interval=aggregation_window,
587
+ agg_funcs=agg_funcs,
588
+ limit=limit,
589
+ sliding_window_step=aggregation_window,
590
+ )
591
+
592
+ full_name = get_invocations_fqn(self.project)
593
+
594
+ if df.empty:
595
+ return mm_schemas.ModelEndpointMonitoringMetricNoData(
596
+ full_name=full_name,
597
+ type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
598
+ )
599
+
600
+ latency_column = (
601
+ f"{agg_funcs[0]}({mm_schemas.EventFieldType.LATENCY})"
602
+ if agg_funcs
603
+ else mm_schemas.EventFieldType.LATENCY
604
+ )
605
+
606
+ return mm_schemas.ModelEndpointMonitoringMetricValues(
607
+ full_name=full_name,
608
+ values=list(
609
+ zip(
610
+ df.index,
611
+ df[latency_column],
612
+ )
613
+ ), # pyright: ignore[reportArgumentType]
614
+ )
615
+
616
+ def read_prediction_metric_for_endpoint_if_exists(
617
+ self, endpoint_id: str
618
+ ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
619
+ # Read just one record, because we just want to check if there is any data for this endpoint_id
620
+ predictions = self.read_predictions(
621
+ endpoint_id=endpoint_id, start="0", end="now", limit=1
622
+ )
623
+ if predictions:
624
+ return mm_schemas.ModelEndpointMonitoringMetric(
625
+ project=self.project,
626
+ app=mm_schemas.SpecialApps.MLRUN_INFRA,
627
+ type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
628
+ name=mm_schemas.PredictionsQueryConstants.INVOCATIONS,
629
+ full_name=get_invocations_fqn(self.project),
630
+ )
@@ -24,6 +24,10 @@ import mlrun.common.schemas
24
24
  from mlrun.common.schemas.model_monitoring import (
25
25
  EventFieldType,
26
26
  )
27
+ from mlrun.common.schemas.model_monitoring.model_endpoints import (
28
+ ModelEndpointMonitoringMetricType,
29
+ _compose_full_name,
30
+ )
27
31
  from mlrun.model_monitoring.model_endpoint import ModelEndpoint
28
32
  from mlrun.utils import logger
29
33
 
@@ -111,6 +115,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
111
115
  )
112
116
 
113
117
 
118
+ def get_tsdb_connection_string(
119
+ secret_provider: typing.Optional[typing.Callable] = None,
120
+ ) -> str:
121
+ """Get TSDB connection string from the project secret. If wasn't set, take it from the system
122
+ configurations.
123
+ :param secret_provider: An optional secret provider to get the connection string secret.
124
+ :return: Valid TSDB connection string.
125
+ """
126
+
127
+ return (
128
+ mlrun.get_secret_or_env(
129
+ key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
130
+ secret_provider=secret_provider,
131
+ )
132
+ or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
133
+ )
134
+
135
+
114
136
  def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
115
137
  """
116
138
  Convert a batch dictionary to timedelta.
@@ -260,3 +282,26 @@ def get_endpoint_record(project: str, endpoint_id: str):
260
282
  project=project,
261
283
  )
262
284
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
285
+
286
+
287
+ def get_result_instance_fqn(
288
+ model_endpoint_id: str, app_name: str, result_name: str
289
+ ) -> str:
290
+ return f"{model_endpoint_id}.{app_name}.result.{result_name}"
291
+
292
+
293
+ def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
294
+ return get_result_instance_fqn(
295
+ model_endpoint_id,
296
+ mm_constants.HistogramDataDriftApplicationConstants.NAME,
297
+ mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
298
+ )
299
+
300
+
301
+ def get_invocations_fqn(project: str) -> str:
302
+ return _compose_full_name(
303
+ project=project,
304
+ app=mm_constants.SpecialApps.MLRUN_INFRA,
305
+ name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
306
+ type=ModelEndpointMonitoringMetricType.METRIC,
307
+ )