mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (55) hide show
  1. mlrun/alerts/alert.py +1 -1
  2. mlrun/artifacts/manager.py +5 -1
  3. mlrun/common/runtimes/constants.py +3 -0
  4. mlrun/common/schemas/__init__.py +1 -1
  5. mlrun/common/schemas/alert.py +31 -9
  6. mlrun/common/schemas/client_spec.py +1 -0
  7. mlrun/common/schemas/function.py +4 -0
  8. mlrun/common/schemas/model_monitoring/__init__.py +3 -1
  9. mlrun/common/schemas/model_monitoring/constants.py +20 -1
  10. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  11. mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
  12. mlrun/config.py +2 -0
  13. mlrun/data_types/to_pandas.py +5 -5
  14. mlrun/datastore/datastore.py +6 -2
  15. mlrun/datastore/redis.py +2 -2
  16. mlrun/datastore/s3.py +5 -0
  17. mlrun/datastore/sources.py +111 -6
  18. mlrun/datastore/targets.py +2 -2
  19. mlrun/db/base.py +5 -1
  20. mlrun/db/httpdb.py +22 -3
  21. mlrun/db/nopdb.py +5 -1
  22. mlrun/errors.py +6 -0
  23. mlrun/feature_store/retrieval/conversion.py +5 -5
  24. mlrun/feature_store/retrieval/job.py +3 -2
  25. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  26. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
  27. mlrun/model_monitoring/db/stores/base/store.py +16 -3
  28. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
  29. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
  30. mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
  31. mlrun/model_monitoring/db/tsdb/base.py +25 -18
  32. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  33. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
  34. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +73 -72
  37. mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
  38. mlrun/model_monitoring/helpers.py +32 -0
  39. mlrun/model_monitoring/stream_processing.py +7 -4
  40. mlrun/model_monitoring/writer.py +18 -13
  41. mlrun/package/utils/_formatter.py +2 -2
  42. mlrun/projects/project.py +33 -8
  43. mlrun/render.py +8 -5
  44. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  45. mlrun/utils/async_http.py +25 -5
  46. mlrun/utils/helpers.py +20 -1
  47. mlrun/utils/notifications/notification/slack.py +27 -7
  48. mlrun/utils/notifications/notification_pusher.py +38 -40
  49. mlrun/utils/version/version.json +2 -2
  50. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +7 -2
  51. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +55 -51
  52. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
  53. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
  54. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
  55. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0
@@ -11,8 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  import datetime
15
+ import typing
16
16
 
17
17
  import pandas as pd
18
18
  import v3io_frames.client
@@ -21,7 +21,7 @@ from v3io.dataplane import Client as V3IOClient
21
21
  from v3io_frames.frames_pb2 import IGNORE
22
22
 
23
23
  import mlrun.common.model_monitoring
24
- import mlrun.common.schemas.model_monitoring as mm_constants
24
+ import mlrun.common.schemas.model_monitoring as mm_schemas
25
25
  import mlrun.feature_store.steps
26
26
  import mlrun.utils.v3io_clients
27
27
  from mlrun.model_monitoring.db import TSDBConnector
@@ -37,12 +37,14 @@ class V3IOTSDBConnector(TSDBConnector):
37
37
  Client that provides API for executing commands on the V3IO TSDB table.
38
38
  """
39
39
 
40
+ type: str = mm_schemas.TSDBTarget.V3IO_TSDB
41
+
40
42
  def __init__(
41
43
  self,
42
44
  project: str,
43
- access_key: str = None,
45
+ access_key: typing.Optional[str] = None,
44
46
  container: str = "users",
45
- v3io_framesd: str = None,
47
+ v3io_framesd: typing.Optional[str] = None,
46
48
  create_table: bool = False,
47
49
  ):
48
50
  super().__init__(project=project)
@@ -61,14 +63,14 @@ class V3IOTSDBConnector(TSDBConnector):
61
63
  self._init_tables_path()
62
64
 
63
65
  if create_table:
64
- self.create_tsdb_application_tables()
66
+ self.create_tables()
65
67
 
66
68
  def _init_tables_path(self):
67
69
  self.tables = {}
68
70
 
69
71
  events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
70
72
  project=self.project,
71
- kind=mm_constants.FileTargetKind.EVENTS,
73
+ kind=mm_schemas.FileTargetKind.EVENTS,
72
74
  )
73
75
  (
74
76
  _,
@@ -77,12 +79,12 @@ class V3IOTSDBConnector(TSDBConnector):
77
79
  ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
78
80
  events_table_full_path
79
81
  )
80
- self.tables[mm_constants.MonitoringTSDBTables.EVENTS] = events_path
82
+ self.tables[mm_schemas.V3IOTSDBTables.EVENTS] = events_path
81
83
 
82
84
  monitoring_application_full_path = (
83
85
  mlrun.mlconf.get_model_monitoring_file_target_path(
84
86
  project=self.project,
85
- kind=mm_constants.FileTargetKind.MONITORING_APPLICATION,
87
+ kind=mm_schemas.FileTargetKind.MONITORING_APPLICATION,
86
88
  )
87
89
  )
88
90
  (
@@ -92,17 +94,17 @@ class V3IOTSDBConnector(TSDBConnector):
92
94
  ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
93
95
  monitoring_application_full_path
94
96
  )
95
- self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS] = (
96
- monitoring_application_path + mm_constants.MonitoringTSDBTables.APP_RESULTS
97
+ self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS] = (
98
+ monitoring_application_path + mm_schemas.V3IOTSDBTables.APP_RESULTS
97
99
  )
98
- self.tables[mm_constants.MonitoringTSDBTables.METRICS] = (
99
- monitoring_application_path + mm_constants.MonitoringTSDBTables.METRICS
100
+ self.tables[mm_schemas.V3IOTSDBTables.METRICS] = (
101
+ monitoring_application_path + mm_schemas.V3IOTSDBTables.METRICS
100
102
  )
101
103
 
102
104
  monitoring_predictions_full_path = (
103
105
  mlrun.mlconf.get_model_monitoring_file_target_path(
104
106
  project=self.project,
105
- kind=mm_constants.FileTargetKind.PREDICTIONS,
107
+ kind=mm_schemas.FileTargetKind.PREDICTIONS,
106
108
  )
107
109
  )
108
110
  (
@@ -112,25 +114,25 @@ class V3IOTSDBConnector(TSDBConnector):
112
114
  ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
113
115
  monitoring_predictions_full_path
114
116
  )
115
- self.tables[mm_constants.FileTargetKind.PREDICTIONS] = (
116
- monitoring_predictions_path
117
- )
117
+ self.tables[mm_schemas.FileTargetKind.PREDICTIONS] = monitoring_predictions_path
118
118
 
119
- def create_tsdb_application_tables(self):
119
+ def create_tables(self) -> None:
120
120
  """
121
- Create the application tables using the TSDB connector. At the moment we support 2 types of application tables:
121
+ Create the tables using the TSDB connector. The tables are being created in the V3IO TSDB and include:
122
122
  - app_results: a detailed result that includes status, kind, extra data, etc.
123
123
  - metrics: a basic key value that represents a single numeric metric.
124
+ Note that the predictions table is automatically created by the model monitoring stream pod.
124
125
  """
125
126
  application_tables = [
126
- mm_constants.MonitoringTSDBTables.APP_RESULTS,
127
- mm_constants.MonitoringTSDBTables.METRICS,
127
+ mm_schemas.V3IOTSDBTables.APP_RESULTS,
128
+ mm_schemas.V3IOTSDBTables.METRICS,
128
129
  ]
129
- for table in application_tables:
130
- logger.info("Creating table in V3IO TSDB", table=table)
130
+ for table_name in application_tables:
131
+ logger.info("Creating table in V3IO TSDB", table_name=table_name)
132
+ table = self.tables[table_name]
131
133
  self._frames_client.create(
132
134
  backend=_TSDB_BE,
133
- table=self.tables[table],
135
+ table=table,
134
136
  if_exists=IGNORE,
135
137
  rate=_TSDB_RATE,
136
138
  )
@@ -156,20 +158,20 @@ class V3IOTSDBConnector(TSDBConnector):
156
158
  "storey.TSDBTarget",
157
159
  name="tsdb_predictions",
158
160
  after="MapFeatureNames",
159
- path=f"{self.container}/{self.tables[mm_constants.FileTargetKind.PREDICTIONS]}",
161
+ path=f"{self.container}/{self.tables[mm_schemas.FileTargetKind.PREDICTIONS]}",
160
162
  rate="1/s",
161
- time_col=mm_constants.EventFieldType.TIMESTAMP,
163
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
162
164
  container=self.container,
163
165
  v3io_frames=self.v3io_framesd,
164
166
  columns=["latency"],
165
167
  index_cols=[
166
- mm_constants.EventFieldType.ENDPOINT_ID,
168
+ mm_schemas.EventFieldType.ENDPOINT_ID,
167
169
  ],
168
170
  aggr="count,avg",
169
171
  aggr_granularity="1m",
170
172
  max_events=tsdb_batching_max_events,
171
173
  flush_after_seconds=tsdb_batching_timeout_secs,
172
- key=mm_constants.EventFieldType.ENDPOINT_ID,
174
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
173
175
  )
174
176
 
175
177
  # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
@@ -198,40 +200,40 @@ class V3IOTSDBConnector(TSDBConnector):
198
200
  "storey.TSDBTarget",
199
201
  name=name,
200
202
  after=after,
201
- path=f"{self.container}/{self.tables[mm_constants.MonitoringTSDBTables.EVENTS]}",
203
+ path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.EVENTS]}",
202
204
  rate="10/m",
203
- time_col=mm_constants.EventFieldType.TIMESTAMP,
205
+ time_col=mm_schemas.EventFieldType.TIMESTAMP,
204
206
  container=self.container,
205
207
  v3io_frames=self.v3io_framesd,
206
208
  infer_columns_from_data=True,
207
209
  index_cols=[
208
- mm_constants.EventFieldType.ENDPOINT_ID,
209
- mm_constants.EventFieldType.RECORD_TYPE,
210
- mm_constants.EventFieldType.ENDPOINT_TYPE,
210
+ mm_schemas.EventFieldType.ENDPOINT_ID,
211
+ mm_schemas.EventFieldType.RECORD_TYPE,
212
+ mm_schemas.EventFieldType.ENDPOINT_TYPE,
211
213
  ],
212
214
  max_events=tsdb_batching_max_events,
213
215
  flush_after_seconds=tsdb_batching_timeout_secs,
214
- key=mm_constants.EventFieldType.ENDPOINT_ID,
216
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
215
217
  )
216
218
 
217
219
  # unpacked base_metrics dictionary
218
220
  apply_filter_and_unpacked_keys(
219
221
  name="FilterAndUnpackKeys1",
220
- keys=mm_constants.EventKeyMetrics.BASE_METRICS,
222
+ keys=mm_schemas.EventKeyMetrics.BASE_METRICS,
221
223
  )
222
224
  apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
223
225
 
224
226
  # unpacked endpoint_features dictionary
225
227
  apply_filter_and_unpacked_keys(
226
228
  name="FilterAndUnpackKeys2",
227
- keys=mm_constants.EventKeyMetrics.ENDPOINT_FEATURES,
229
+ keys=mm_schemas.EventKeyMetrics.ENDPOINT_FEATURES,
228
230
  )
229
231
  apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
230
232
 
231
233
  # unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
232
234
  apply_filter_and_unpacked_keys(
233
235
  name="FilterAndUnpackKeys3",
234
- keys=mm_constants.EventKeyMetrics.CUSTOM_METRICS,
236
+ keys=mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
235
237
  )
236
238
 
237
239
  def apply_storey_filter():
@@ -248,56 +250,55 @@ class V3IOTSDBConnector(TSDBConnector):
248
250
  def write_application_event(
249
251
  self,
250
252
  event: dict,
251
- kind: mm_constants.WriterEventKind = mm_constants.WriterEventKind.RESULT,
252
- ):
253
+ kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
254
+ ) -> None:
253
255
  """Write a single result or metric to TSDB"""
254
256
 
255
- event[mm_constants.WriterEvent.END_INFER_TIME] = (
256
- datetime.datetime.fromisoformat(
257
- event[mm_constants.WriterEvent.END_INFER_TIME]
258
- )
257
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = datetime.datetime.fromisoformat(
258
+ event[mm_schemas.WriterEvent.END_INFER_TIME]
259
259
  )
260
+ index_cols_base = [
261
+ mm_schemas.WriterEvent.END_INFER_TIME,
262
+ mm_schemas.WriterEvent.ENDPOINT_ID,
263
+ mm_schemas.WriterEvent.APPLICATION_NAME,
264
+ ]
260
265
 
261
- if kind == mm_constants.WriterEventKind.METRIC:
262
- # TODO : Implement the logic for writing metrics to V3IO TSDB
263
- return
266
+ if kind == mm_schemas.WriterEventKind.METRIC:
267
+ table = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
268
+ index_cols = index_cols_base + [mm_schemas.MetricData.METRIC_NAME]
269
+ elif kind == mm_schemas.WriterEventKind.RESULT:
270
+ table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
271
+ index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
272
+ del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
273
+ else:
274
+ raise ValueError(f"Invalid {kind = }")
264
275
 
265
- del event[mm_constants.ResultData.RESULT_EXTRA_DATA]
266
276
  try:
267
277
  self._frames_client.write(
268
278
  backend=_TSDB_BE,
269
- table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
279
+ table=table,
270
280
  dfs=pd.DataFrame.from_records([event]),
271
- index_cols=[
272
- mm_constants.WriterEvent.END_INFER_TIME,
273
- mm_constants.WriterEvent.ENDPOINT_ID,
274
- mm_constants.WriterEvent.APPLICATION_NAME,
275
- mm_constants.ResultData.RESULT_NAME,
276
- ],
277
- )
278
- logger.info(
279
- "Updated V3IO TSDB successfully",
280
- table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
281
+ index_cols=index_cols,
281
282
  )
283
+ logger.info("Updated V3IO TSDB successfully", table=table)
282
284
  except v3io_frames.errors.Error as err:
283
- logger.warn(
285
+ logger.exception(
284
286
  "Could not write drift measures to TSDB",
285
287
  err=err,
286
- table=self.tables[mm_constants.MonitoringTSDBTables.APP_RESULTS],
288
+ table=table,
287
289
  event=event,
288
290
  )
289
-
290
291
  raise mlrun.errors.MLRunRuntimeError(
291
292
  f"Failed to write application result to TSDB: {err}"
292
293
  )
293
294
 
294
- def delete_tsdb_resources(self, table: str = None):
295
+ def delete_tsdb_resources(self, table: typing.Optional[str] = None):
295
296
  if table:
296
297
  # Delete a specific table
297
298
  tables = [table]
298
299
  else:
299
300
  # Delete all tables
300
- tables = mm_constants.MonitoringTSDBTables.list()
301
+ tables = mm_schemas.V3IOTSDBTables.list()
301
302
  for table in tables:
302
303
  try:
303
304
  self._frames_client.delete(
@@ -320,8 +321,8 @@ class V3IOTSDBConnector(TSDBConnector):
320
321
  self,
321
322
  endpoint_id: str,
322
323
  metrics: list[str],
323
- start: str = "now-1h",
324
- end: str = "now",
324
+ start: str,
325
+ end: str,
325
326
  ) -> dict[str, list[tuple[str, float]]]:
326
327
  """
327
328
  Getting real time metrics from the TSDB. There are pre-defined metrics for model endpoints such as
@@ -350,7 +351,7 @@ class V3IOTSDBConnector(TSDBConnector):
350
351
 
351
352
  try:
352
353
  data = self.get_records(
353
- table=mm_constants.MonitoringTSDBTables.EVENTS,
354
+ table=mm_schemas.V3IOTSDBTables.EVENTS,
354
355
  columns=["endpoint_id", *metrics],
355
356
  filter_query=f"endpoint_id=='{endpoint_id}'",
356
357
  start=start,
@@ -377,17 +378,14 @@ class V3IOTSDBConnector(TSDBConnector):
377
378
  def get_records(
378
379
  self,
379
380
  table: str,
380
- columns: list[str] = None,
381
+ start: str,
382
+ end: str,
383
+ columns: typing.Optional[list[str]] = None,
381
384
  filter_query: str = "",
382
- start: str = "now-1h",
383
- end: str = "now",
384
385
  ) -> pd.DataFrame:
385
386
  """
386
387
  Getting records from V3IO TSDB data collection.
387
388
  :param table: Path to the collection to query.
388
- :param columns: Columns to include in the result.
389
- :param filter_query: V3IO filter expression. The expected filter expression includes different conditions,
390
- divided by ' AND '.
391
389
  :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
392
390
  time, a Unix timestamp in milliseconds, a relative time (`'now'` or
393
391
  `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
@@ -396,6 +394,9 @@ class V3IOTSDBConnector(TSDBConnector):
396
394
  time, a Unix timestamp in milliseconds, a relative time (`'now'` or
397
395
  `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and
398
396
  `'s'` = seconds), or 0 for the earliest time.
397
+ :param columns: Columns to include in the result.
398
+ :param filter_query: V3IO filter expression. The expected filter expression includes different conditions,
399
+ divided by ' AND '.
399
400
  :return: DataFrame with the provided attributes from the data collection.
400
401
  :raise: MLRunNotFoundError if the provided table wasn't found.
401
402
  """
@@ -422,7 +423,7 @@ class V3IOTSDBConnector(TSDBConnector):
422
423
  """
423
424
  events_table_full_path = mlrun.mlconf.get_model_monitoring_file_target_path(
424
425
  project=self.project,
425
- kind=mm_constants.FileTargetKind.EVENTS,
426
+ kind=mm_schemas.FileTargetKind.EVENTS,
426
427
  )
427
428
 
428
429
  # Generate the main directory with the V3IO resources
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # TODO: Move this module into the TSDB abstraction once it is in.
15
+ # TODO: Move this module into the TSDB abstraction:
16
+ # mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py
16
17
 
17
18
  from datetime import datetime
18
19
  from io import StringIO
20
+ from typing import Literal, Optional, Union
19
21
 
20
22
  import pandas as pd
21
23
 
@@ -25,21 +27,27 @@ import mlrun.model_monitoring.writer as mm_writer
25
27
  import mlrun.utils.v3io_clients
26
28
  from mlrun.common.schemas.model_monitoring.model_endpoints import (
27
29
  ModelEndpointMonitoringMetric,
30
+ ModelEndpointMonitoringMetricNoData,
28
31
  ModelEndpointMonitoringMetricType,
29
- ModelEndpointMonitoringResultNoData,
32
+ ModelEndpointMonitoringMetricValues,
30
33
  ModelEndpointMonitoringResultValues,
31
34
  _compose_full_name,
32
- _ModelEndpointMonitoringResultValuesBase,
35
+ _ModelEndpointMonitoringMetricValuesBase,
33
36
  )
34
37
  from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
35
38
  from mlrun.model_monitoring.db.tsdb.v3io.v3io_connector import _TSDB_BE
36
39
  from mlrun.utils import logger
37
40
 
38
41
 
39
- def _get_sql_query(endpoint_id: str, names: list[tuple[str, str]]) -> str:
42
+ def _get_sql_query(
43
+ endpoint_id: str,
44
+ names: list[tuple[str, str]],
45
+ table_name: str = mm_constants.V3IOTSDBTables.APP_RESULTS,
46
+ name: str = mm_writer.ResultData.RESULT_NAME,
47
+ ) -> str:
40
48
  with StringIO() as query:
41
49
  query.write(
42
- f"SELECT * FROM '{mm_constants.MonitoringTSDBTables.APP_RESULTS}' "
50
+ f"SELECT * FROM '{table_name}' "
43
51
  f"WHERE {mm_writer.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
44
52
  )
45
53
  if names:
@@ -48,7 +56,7 @@ def _get_sql_query(endpoint_id: str, names: list[tuple[str, str]]) -> str:
48
56
  for i, (app_name, result_name) in enumerate(names):
49
57
  sub_cond = (
50
58
  f"({mm_writer.WriterEvent.APPLICATION_NAME}='{app_name}' "
51
- f"AND {mm_writer.ResultData.RESULT_NAME}='{result_name}')"
59
+ f"AND {name}='{result_name}')"
52
60
  )
53
61
  if i != 0: # not first sub condition
54
62
  query.write(" OR ")
@@ -73,30 +81,87 @@ def _get_result_kind(result_df: pd.DataFrame) -> mm_constants.ResultKindApp:
73
81
  return unique_kinds[0]
74
82
 
75
83
 
76
- def read_data(
84
+ def read_metrics_data(
77
85
  *,
78
86
  project: str,
79
87
  endpoint_id: str,
80
88
  start: datetime,
81
89
  end: datetime,
82
90
  metrics: list[ModelEndpointMonitoringMetric],
83
- ) -> list[_ModelEndpointMonitoringResultValuesBase]:
91
+ type: Literal["metrics", "results"] = "results",
92
+ ) -> Union[
93
+ list[
94
+ Union[
95
+ ModelEndpointMonitoringResultValues,
96
+ ModelEndpointMonitoringMetricNoData,
97
+ ],
98
+ ],
99
+ list[
100
+ Union[
101
+ ModelEndpointMonitoringMetricValues,
102
+ ModelEndpointMonitoringMetricNoData,
103
+ ],
104
+ ],
105
+ ]:
106
+ """
107
+ Read metrics OR results from the TSDB and return as a list.
108
+ Note: the type must match the actual metrics in the `metrics` parameter.
109
+ If the type is "results", pass only results in the `metrics` parameter.
110
+ """
84
111
  client = mlrun.utils.v3io_clients.get_frames_client(
85
112
  address=mlrun.mlconf.v3io_framesd,
86
113
  container=KVStoreBase.get_v3io_monitoring_apps_container(project),
87
114
  )
115
+
116
+ if type == "metrics":
117
+ table_name = mm_constants.V3IOTSDBTables.METRICS
118
+ name = mm_constants.MetricData.METRIC_NAME
119
+ df_handler = df_to_metrics_values
120
+ elif type == "results":
121
+ table_name = mm_constants.V3IOTSDBTables.APP_RESULTS
122
+ name = mm_constants.ResultData.RESULT_NAME
123
+ df_handler = df_to_results_values
124
+ else:
125
+ raise ValueError(f"Invalid {type = }")
126
+
127
+ query = _get_sql_query(
128
+ endpoint_id,
129
+ [(metric.app, metric.name) for metric in metrics],
130
+ table_name=table_name,
131
+ name=name,
132
+ )
133
+
134
+ logger.debug("Querying V3IO TSDB", query=query)
135
+
88
136
  df: pd.DataFrame = client.read(
89
137
  backend=_TSDB_BE,
90
- query=_get_sql_query(
91
- endpoint_id, [(metric.app, metric.name) for metric in metrics]
92
- ),
138
+ query=query,
93
139
  start=start,
94
140
  end=end,
95
141
  )
96
142
 
143
+ logger.debug(
144
+ "Read a data-frame", project=project, endpoint_id=endpoint_id, is_empty=df.empty
145
+ )
146
+
147
+ return df_handler(df=df, metrics=metrics, project=project)
148
+
149
+
150
+ def df_to_results_values(
151
+ *, df: pd.DataFrame, metrics: list[ModelEndpointMonitoringMetric], project: str
152
+ ) -> list[
153
+ Union[ModelEndpointMonitoringResultValues, ModelEndpointMonitoringMetricNoData]
154
+ ]:
155
+ """
156
+ Parse a time-indexed data-frame of results from the TSDB into a list of
157
+ results values per distinct results.
158
+ When a result is not found in the data-frame, it is represented in no-data object.
159
+ """
97
160
  metrics_without_data = {metric.full_name: metric for metric in metrics}
98
161
 
99
- metrics_values: list[_ModelEndpointMonitoringResultValuesBase] = []
162
+ metrics_values: list[
163
+ Union[ModelEndpointMonitoringResultValues, ModelEndpointMonitoringMetricNoData]
164
+ ] = []
100
165
  if not df.empty:
101
166
  grouped = df.groupby(
102
167
  [mm_writer.WriterEvent.APPLICATION_NAME, mm_writer.ResultData.RESULT_NAME],
@@ -104,13 +169,13 @@ def read_data(
104
169
  )
105
170
  else:
106
171
  grouped = []
107
- for (app_name, result_name), sub_df in grouped:
172
+ logger.debug("No results", missing_results=metrics_without_data.keys())
173
+ for (app_name, name), sub_df in grouped:
108
174
  result_kind = _get_result_kind(sub_df)
109
- full_name = _compose_full_name(project=project, app=app_name, name=result_name)
175
+ full_name = _compose_full_name(project=project, app=app_name, name=name)
110
176
  metrics_values.append(
111
177
  ModelEndpointMonitoringResultValues(
112
178
  full_name=full_name,
113
- type=ModelEndpointMonitoringMetricType.RESULT,
114
179
  result_kind=result_kind,
115
180
  values=list(
116
181
  zip(
@@ -124,11 +189,147 @@ def read_data(
124
189
  del metrics_without_data[full_name]
125
190
 
126
191
  for metric in metrics_without_data.values():
192
+ if metric.full_name == get_invocations_fqn(project):
193
+ continue
127
194
  metrics_values.append(
128
- ModelEndpointMonitoringResultNoData(
195
+ ModelEndpointMonitoringMetricNoData(
129
196
  full_name=metric.full_name,
130
197
  type=ModelEndpointMonitoringMetricType.RESULT,
131
198
  )
132
199
  )
133
200
 
134
201
  return metrics_values
202
+
203
+
204
+ def df_to_metrics_values(
205
+ *, df: pd.DataFrame, metrics: list[ModelEndpointMonitoringMetric], project: str
206
+ ) -> list[
207
+ Union[ModelEndpointMonitoringMetricValues, ModelEndpointMonitoringMetricNoData]
208
+ ]:
209
+ """
210
+ Parse a time-indexed data-frame of metrics from the TSDB into a list of
211
+ metrics values per distinct results.
212
+ When a metric is not found in the data-frame, it is represented in no-data object.
213
+ """
214
+ metrics_without_data = {metric.full_name: metric for metric in metrics}
215
+
216
+ metrics_values: list[
217
+ Union[ModelEndpointMonitoringMetricValues, ModelEndpointMonitoringMetricNoData]
218
+ ] = []
219
+ if not df.empty:
220
+ grouped = df.groupby(
221
+ [mm_writer.WriterEvent.APPLICATION_NAME, mm_writer.MetricData.METRIC_NAME],
222
+ observed=False,
223
+ )
224
+ else:
225
+ logger.debug("No metrics", missing_metrics=metrics_without_data.keys())
226
+ grouped = []
227
+ for (app_name, name), sub_df in grouped:
228
+ full_name = _compose_full_name(
229
+ project=project,
230
+ app=app_name,
231
+ name=name,
232
+ type=ModelEndpointMonitoringMetricType.METRIC,
233
+ )
234
+ metrics_values.append(
235
+ ModelEndpointMonitoringMetricValues(
236
+ full_name=full_name,
237
+ values=list(
238
+ zip(
239
+ sub_df.index,
240
+ sub_df[mm_writer.MetricData.METRIC_VALUE],
241
+ )
242
+ ), # pyright: ignore[reportArgumentType]
243
+ )
244
+ )
245
+ del metrics_without_data[full_name]
246
+
247
+ for metric in metrics_without_data.values():
248
+ metrics_values.append(
249
+ ModelEndpointMonitoringMetricNoData(
250
+ full_name=metric.full_name,
251
+ type=ModelEndpointMonitoringMetricType.METRIC,
252
+ )
253
+ )
254
+
255
+ return metrics_values
256
+
257
+
258
+ def get_invocations_fqn(project: str):
259
+ return mlrun.common.schemas.model_monitoring.model_endpoints._compose_full_name(
260
+ project=project,
261
+ app=mm_constants.SpecialApps.MLRUN_INFRA,
262
+ name=mlrun.common.schemas.model_monitoring.constants.PredictionsQueryConstants.INVOCATIONS,
263
+ type=mlrun.common.schemas.model_monitoring.ModelEndpointMonitoringMetricType.METRIC,
264
+ )
265
+
266
+
267
+ def read_predictions(
268
+ *,
269
+ project: str,
270
+ endpoint_id: str,
271
+ start: Optional[Union[datetime, str]] = None,
272
+ end: Optional[Union[datetime, str]] = None,
273
+ aggregation_window: Optional[str] = None,
274
+ limit: Optional[int] = None,
275
+ ) -> _ModelEndpointMonitoringMetricValuesBase:
276
+ client = mlrun.utils.v3io_clients.get_frames_client(
277
+ address=mlrun.mlconf.v3io_framesd,
278
+ container="users",
279
+ )
280
+ frames_client_kwargs = {}
281
+ if aggregation_window:
282
+ frames_client_kwargs["step"] = aggregation_window
283
+ frames_client_kwargs["aggregation_window"] = aggregation_window
284
+ if limit:
285
+ frames_client_kwargs["limit"] = limit
286
+ df: pd.DataFrame = client.read(
287
+ backend=_TSDB_BE,
288
+ table=f"pipelines/{project}/model-endpoints/predictions",
289
+ columns=["latency"],
290
+ filter=f"endpoint_id=='{endpoint_id}'",
291
+ start=start,
292
+ end=end,
293
+ aggregators="count",
294
+ **frames_client_kwargs,
295
+ )
296
+
297
+ full_name = get_invocations_fqn(project)
298
+
299
+ if df.empty:
300
+ return ModelEndpointMonitoringMetricNoData(
301
+ full_name=full_name,
302
+ type=ModelEndpointMonitoringMetricType.METRIC,
303
+ )
304
+
305
+ return ModelEndpointMonitoringMetricValues(
306
+ full_name=full_name,
307
+ values=list(
308
+ zip(
309
+ df.index,
310
+ df["count(latency)"],
311
+ )
312
+ ),
313
+ )
314
+
315
+
316
+ def read_prediction_metric_for_endpoint_if_exists(
317
+ *,
318
+ project: str,
319
+ endpoint_id: str,
320
+ ) -> Optional[ModelEndpointMonitoringMetric]:
321
+ predictions = read_predictions(
322
+ project=project,
323
+ endpoint_id=endpoint_id,
324
+ start="0",
325
+ end="now",
326
+ limit=1, # Read just one record, because we just want to check if there is any data for this endpoint_id
327
+ )
328
+ if predictions:
329
+ return ModelEndpointMonitoringMetric(
330
+ project=project,
331
+ app=mm_constants.SpecialApps.MLRUN_INFRA,
332
+ type=ModelEndpointMonitoringMetricType.METRIC,
333
+ name=mlrun.common.schemas.model_monitoring.constants.PredictionsQueryConstants.INVOCATIONS,
334
+ full_name=get_invocations_fqn(project),
335
+ )