mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (55) hide show
  1. mlrun/alerts/alert.py +1 -1
  2. mlrun/artifacts/manager.py +5 -1
  3. mlrun/common/runtimes/constants.py +3 -0
  4. mlrun/common/schemas/__init__.py +1 -1
  5. mlrun/common/schemas/alert.py +31 -9
  6. mlrun/common/schemas/client_spec.py +1 -0
  7. mlrun/common/schemas/function.py +4 -0
  8. mlrun/common/schemas/model_monitoring/__init__.py +3 -1
  9. mlrun/common/schemas/model_monitoring/constants.py +20 -1
  10. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  11. mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
  12. mlrun/config.py +2 -0
  13. mlrun/data_types/to_pandas.py +5 -5
  14. mlrun/datastore/datastore.py +6 -2
  15. mlrun/datastore/redis.py +2 -2
  16. mlrun/datastore/s3.py +5 -0
  17. mlrun/datastore/sources.py +111 -6
  18. mlrun/datastore/targets.py +2 -2
  19. mlrun/db/base.py +5 -1
  20. mlrun/db/httpdb.py +22 -3
  21. mlrun/db/nopdb.py +5 -1
  22. mlrun/errors.py +6 -0
  23. mlrun/feature_store/retrieval/conversion.py +5 -5
  24. mlrun/feature_store/retrieval/job.py +3 -2
  25. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  26. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
  27. mlrun/model_monitoring/db/stores/base/store.py +16 -3
  28. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
  29. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
  30. mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
  31. mlrun/model_monitoring/db/tsdb/base.py +25 -18
  32. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  33. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
  34. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +73 -72
  37. mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
  38. mlrun/model_monitoring/helpers.py +32 -0
  39. mlrun/model_monitoring/stream_processing.py +7 -4
  40. mlrun/model_monitoring/writer.py +18 -13
  41. mlrun/package/utils/_formatter.py +2 -2
  42. mlrun/projects/project.py +33 -8
  43. mlrun/render.py +8 -5
  44. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  45. mlrun/utils/async_http.py +25 -5
  46. mlrun/utils/helpers.py +20 -1
  47. mlrun/utils/notifications/notification/slack.py +27 -7
  48. mlrun/utils/notifications/notification_pusher.py +38 -40
  49. mlrun/utils/version/version.json +2 -2
  50. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +7 -2
  51. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +55 -51
  52. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
  53. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
  54. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
  55. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0
@@ -15,23 +15,79 @@
15
15
  import json
16
16
  import os
17
17
  import typing
18
+ from dataclasses import dataclass
18
19
  from http import HTTPStatus
19
20
 
20
21
  import v3io.dataplane
22
+ import v3io.dataplane.output
21
23
  import v3io.dataplane.response
22
24
 
23
25
  import mlrun.common.model_monitoring.helpers
24
- import mlrun.common.schemas.model_monitoring as mm_constants
26
+ import mlrun.common.schemas.model_monitoring as mm_schemas
25
27
  import mlrun.model_monitoring.db
26
28
  import mlrun.utils.v3io_clients
27
29
  from mlrun.utils import logger
28
30
 
29
31
  # Fields to encode before storing in the KV table or to decode after retrieving
30
32
  fields_to_encode_decode = [
31
- mm_constants.EventFieldType.FEATURE_STATS,
32
- mm_constants.EventFieldType.CURRENT_STATS,
33
+ mm_schemas.EventFieldType.FEATURE_STATS,
34
+ mm_schemas.EventFieldType.CURRENT_STATS,
33
35
  ]
34
36
 
37
+ _METRIC_FIELDS: list[str] = [
38
+ mm_schemas.WriterEvent.APPLICATION_NAME,
39
+ mm_schemas.MetricData.METRIC_NAME,
40
+ mm_schemas.MetricData.METRIC_VALUE,
41
+ mm_schemas.WriterEvent.START_INFER_TIME,
42
+ mm_schemas.WriterEvent.END_INFER_TIME,
43
+ ]
44
+
45
+
46
+ class SchemaField(typing.TypedDict):
47
+ name: str
48
+ type: str
49
+ nullable: bool
50
+
51
+
52
+ @dataclass
53
+ class SchemaParams:
54
+ key: str
55
+ fields: list[SchemaField]
56
+
57
+
58
+ _RESULT_SCHEMA: list[SchemaField] = [
59
+ SchemaField(
60
+ name=mm_schemas.ResultData.RESULT_NAME,
61
+ type=mm_schemas.GrafanaColumnType.STRING,
62
+ nullable=False,
63
+ )
64
+ ]
65
+
66
+ _METRIC_SCHEMA: list[SchemaField] = [
67
+ SchemaField(
68
+ name=mm_schemas.WriterEvent.APPLICATION_NAME,
69
+ type=mm_schemas.GrafanaColumnType.STRING,
70
+ nullable=False,
71
+ ),
72
+ SchemaField(
73
+ name=mm_schemas.MetricData.METRIC_NAME,
74
+ type=mm_schemas.GrafanaColumnType.STRING,
75
+ nullable=False,
76
+ ),
77
+ ]
78
+
79
+
80
+ _KIND_TO_SCHEMA_PARAMS: dict[mm_schemas.WriterEventKind, SchemaParams] = {
81
+ mm_schemas.WriterEventKind.RESULT: SchemaParams(
82
+ key=mm_schemas.WriterEvent.APPLICATION_NAME, fields=_RESULT_SCHEMA
83
+ ),
84
+ mm_schemas.WriterEventKind.METRIC: SchemaParams(
85
+ key="metric_id", fields=_METRIC_SCHEMA
86
+ ),
87
+ }
88
+
89
+ _EXCLUDE_SCHEMA_FILTER_EXPRESSION = '__name!=".#schema"'
90
+
35
91
 
36
92
  class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
37
93
  """
@@ -64,7 +120,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
64
120
  self.client.kv.put(
65
121
  container=self.container,
66
122
  table_path=self.path,
67
- key=endpoint[mm_constants.EventFieldType.UID],
123
+ key=endpoint[mm_schemas.EventFieldType.UID],
68
124
  attributes=endpoint,
69
125
  )
70
126
 
@@ -151,7 +207,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
151
207
  """Getting path and container based on the model monitoring configurations"""
152
208
  path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
153
209
  project=self.project,
154
- kind=mlrun.common.schemas.ModelMonitoringStoreKinds.ENDPOINTS,
210
+ kind=mm_schemas.ModelMonitoringStoreKinds.ENDPOINTS,
155
211
  )
156
212
  (
157
213
  _,
@@ -217,11 +273,11 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
217
273
  if uids is None:
218
274
  uids = []
219
275
  for item in items:
220
- if mm_constants.EventFieldType.UID not in item:
276
+ if mm_schemas.EventFieldType.UID not in item:
221
277
  # This is kept for backwards compatibility - in old versions the key column named endpoint_id
222
- uids.append(item[mm_constants.EventFieldType.ENDPOINT_ID])
278
+ uids.append(item[mm_schemas.EventFieldType.ENDPOINT_ID])
223
279
  else:
224
- uids.append(item[mm_constants.EventFieldType.UID])
280
+ uids.append(item[mm_schemas.EventFieldType.UID])
225
281
 
226
282
  # Add each relevant model endpoint to the model endpoints list
227
283
  for endpoint_id in uids:
@@ -241,11 +297,11 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
241
297
 
242
298
  # Delete model endpoint record from KV table
243
299
  for endpoint_dict in endpoints:
244
- if mm_constants.EventFieldType.UID not in endpoint_dict:
300
+ if mm_schemas.EventFieldType.UID not in endpoint_dict:
245
301
  # This is kept for backwards compatibility - in old versions the key column named endpoint_id
246
- endpoint_id = endpoint_dict[mm_constants.EventFieldType.ENDPOINT_ID]
302
+ endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
247
303
  else:
248
- endpoint_id = endpoint_dict[mm_constants.EventFieldType.UID]
304
+ endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
249
305
  self.delete_model_endpoint(
250
306
  endpoint_id,
251
307
  )
@@ -268,11 +324,19 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
268
324
  raise_for_status=v3io.dataplane.RaiseForStatus.never,
269
325
  )
270
326
 
327
+ @staticmethod
328
+ def _get_results_table_path(endpoint_id: str) -> str:
329
+ return endpoint_id
330
+
331
+ @staticmethod
332
+ def _get_metrics_table_path(endpoint_id: str) -> str:
333
+ return f"{endpoint_id}_metrics"
334
+
271
335
  def write_application_event(
272
336
  self,
273
337
  event: dict[str, typing.Any],
274
- kind: mm_constants.WriterEventKind = mm_constants.WriterEventKind.RESULT,
275
- ):
338
+ kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
339
+ ) -> None:
276
340
  """
277
341
  Write a new application event in the target table.
278
342
 
@@ -281,66 +345,63 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
281
345
  object.
282
346
  :param kind: The type of the event, can be either "result" or "metric".
283
347
  """
284
- if kind == mm_constants.WriterEventKind.METRIC:
285
- # TODO : Implement the logic for writing metrics to KV
286
- return
287
-
288
- endpoint_id = event.pop(mm_constants.WriterEvent.ENDPOINT_ID)
289
- app_name = event.pop(mm_constants.WriterEvent.APPLICATION_NAME)
290
- metric_name = event.pop(mm_constants.ResultData.RESULT_NAME)
291
- attributes = {metric_name: json.dumps(event)}
292
348
 
293
- v3io_monitoring_apps_container = self.get_v3io_monitoring_apps_container(
294
- project_name=self.project
295
- )
349
+ container = self.get_v3io_monitoring_apps_container(project_name=self.project)
350
+ endpoint_id = event.pop(mm_schemas.WriterEvent.ENDPOINT_ID)
351
+
352
+ if kind == mm_schemas.WriterEventKind.METRIC:
353
+ table_path = self._get_metrics_table_path(endpoint_id)
354
+ key = f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}.{event[mm_schemas.MetricData.METRIC_NAME]}"
355
+ attributes = {event_key: event[event_key] for event_key in _METRIC_FIELDS}
356
+ elif kind == mm_schemas.WriterEventKind.RESULT:
357
+ table_path = self._get_results_table_path(endpoint_id)
358
+ key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
359
+ metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
360
+ attributes = {metric_name: json.dumps(event)}
361
+ else:
362
+ raise ValueError(f"Invalid {kind = }")
296
363
 
297
364
  self.client.kv.update(
298
- container=v3io_monitoring_apps_container,
299
- table_path=endpoint_id,
300
- key=app_name,
365
+ container=container,
366
+ table_path=table_path,
367
+ key=key,
301
368
  attributes=attributes,
302
369
  )
303
370
 
304
371
  schema_file = self.client.kv.new_cursor(
305
- container=v3io_monitoring_apps_container,
306
- table_path=endpoint_id,
372
+ container=container,
373
+ table_path=table_path,
307
374
  filter_expression='__name==".#schema"',
308
375
  )
309
376
 
310
377
  if not schema_file.all():
311
378
  logger.info(
312
- "Generate a new V3IO KV schema file",
313
- container=v3io_monitoring_apps_container,
314
- endpoint_id=endpoint_id,
379
+ "Generating a new V3IO KV schema file",
380
+ container=container,
381
+ table_path=table_path,
382
+ )
383
+ self._generate_kv_schema(
384
+ container=container, table_path=table_path, kind=kind
315
385
  )
316
- self._generate_kv_schema(endpoint_id, v3io_monitoring_apps_container)
317
- logger.info("Updated V3IO KV successfully", key=app_name)
386
+ logger.info("Updated V3IO KV successfully", key=key)
318
387
 
319
388
  def _generate_kv_schema(
320
- self, endpoint_id: str, v3io_monitoring_apps_container: str
321
- ):
389
+ self, *, container: str, table_path: str, kind: mm_schemas.WriterEventKind
390
+ ) -> None:
322
391
  """Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
323
- fields = [
324
- {
325
- "name": mm_constants.ResultData.RESULT_NAME,
326
- "type": "string",
327
- "nullable": False,
328
- }
329
- ]
392
+ schema_params = _KIND_TO_SCHEMA_PARAMS[kind]
330
393
  res = self.client.kv.create_schema(
331
- container=v3io_monitoring_apps_container,
332
- table_path=endpoint_id,
333
- key=mm_constants.WriterEvent.APPLICATION_NAME,
334
- fields=fields,
394
+ container=container,
395
+ table_path=table_path,
396
+ key=schema_params.key,
397
+ fields=schema_params.fields,
335
398
  )
336
399
  if res.status_code != HTTPStatus.OK:
337
400
  raise mlrun.errors.MLRunBadRequestError(
338
- f"Couldn't infer schema for endpoint {endpoint_id} which is required for Grafana dashboards"
401
+ f"Couldn't infer schema for endpoint {table_path} which is required for Grafana dashboards"
339
402
  )
340
403
  else:
341
- logger.info(
342
- "Generated V3IO KV schema successfully", endpoint_id=endpoint_id
343
- )
404
+ logger.info("Generated V3IO KV schema successfully", table_path=table_path)
344
405
 
345
406
  def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
346
407
  """
@@ -361,7 +422,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
361
422
  table_path=endpoint_id,
362
423
  key=application_name,
363
424
  )
364
- return data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
425
+ return data.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
365
426
  except v3io.dataplane.response.HttpResponseError as err:
366
427
  logger.debug("Error while getting last analyzed time", err=err)
367
428
  raise mlrun.errors.MLRunNotFoundError(
@@ -386,7 +447,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
386
447
  ),
387
448
  table_path=endpoint_id,
388
449
  key=application_name,
389
- attributes={mm_constants.SchedulingKeys.LAST_ANALYZED: last_analyzed},
450
+ attributes={mm_schemas.SchedulingKeys.LAST_ANALYZED: last_analyzed},
390
451
  )
391
452
 
392
453
  def _generate_tsdb_paths(self) -> tuple[str, str]:
@@ -399,7 +460,7 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
399
460
  full_path = (
400
461
  mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
401
462
  project=self.project,
402
- kind=mlrun.common.schemas.ModelMonitoringStoreKinds.EVENTS,
463
+ kind=mm_schemas.ModelMonitoringStoreKinds.EVENTS,
403
464
  )
404
465
  )
405
466
 
@@ -495,8 +556,8 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
495
556
  # Apply top_level filter (remove endpoints that considered a child of a router)
496
557
  if top_level:
497
558
  filter_expression.append(
498
- f"(endpoint_type=='{str(mm_constants.EndpointType.NODE_EP.value)}' "
499
- f"OR endpoint_type=='{str(mm_constants.EndpointType.ROUTER.value)}')"
559
+ f"(endpoint_type=='{str(mm_schemas.EndpointType.NODE_EP.value)}' "
560
+ f"OR endpoint_type=='{str(mm_schemas.EndpointType.ROUTER.value)}')"
500
561
  )
501
562
 
502
563
  return " AND ".join(filter_expression)
@@ -516,30 +577,30 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
516
577
  # Validate default value for `error_count`
517
578
  # For backwards compatibility reasons, we validate that the model endpoint includes the `error_count` key
518
579
  if (
519
- mm_constants.EventFieldType.ERROR_COUNT in endpoint
520
- and endpoint[mm_constants.EventFieldType.ERROR_COUNT] == "null"
580
+ mm_schemas.EventFieldType.ERROR_COUNT in endpoint
581
+ and endpoint[mm_schemas.EventFieldType.ERROR_COUNT] == "null"
521
582
  ):
522
- endpoint[mm_constants.EventFieldType.ERROR_COUNT] = "0"
583
+ endpoint[mm_schemas.EventFieldType.ERROR_COUNT] = "0"
523
584
 
524
585
  # Validate default value for `metrics`
525
586
  # For backwards compatibility reasons, we validate that the model endpoint includes the `metrics` key
526
587
  if (
527
- mm_constants.EventFieldType.METRICS in endpoint
528
- and endpoint[mm_constants.EventFieldType.METRICS] == "null"
588
+ mm_schemas.EventFieldType.METRICS in endpoint
589
+ and endpoint[mm_schemas.EventFieldType.METRICS] == "null"
529
590
  ):
530
- endpoint[mm_constants.EventFieldType.METRICS] = json.dumps(
591
+ endpoint[mm_schemas.EventFieldType.METRICS] = json.dumps(
531
592
  {
532
- mm_constants.EventKeyMetrics.GENERIC: {
533
- mm_constants.EventLiveStats.LATENCY_AVG_1H: 0,
534
- mm_constants.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
593
+ mm_schemas.EventKeyMetrics.GENERIC: {
594
+ mm_schemas.EventLiveStats.LATENCY_AVG_1H: 0,
595
+ mm_schemas.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
535
596
  }
536
597
  }
537
598
  )
538
599
  # Validate key `uid` instead of `endpoint_id`
539
600
  # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
540
- if mm_constants.EventFieldType.ENDPOINT_ID in endpoint:
541
- endpoint[mm_constants.EventFieldType.UID] = endpoint[
542
- mm_constants.EventFieldType.ENDPOINT_ID
601
+ if mm_schemas.EventFieldType.ENDPOINT_ID in endpoint:
602
+ endpoint[mm_schemas.EventFieldType.UID] = endpoint[
603
+ mm_schemas.EventFieldType.ENDPOINT_ID
543
604
  ]
544
605
 
545
606
  @staticmethod
@@ -566,57 +627,95 @@ class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
566
627
  def _get_monitoring_schedules_container(project_name: str) -> str:
567
628
  return f"users/pipelines/{project_name}/monitoring-schedules/functions"
568
629
 
569
- def _extract_metrics_from_items(
630
+ def _extract_results_from_items(
570
631
  self, app_items: list[dict[str, str]]
571
- ) -> list[mm_constants.ModelEndpointMonitoringMetric]:
572
- metrics: list[mm_constants.ModelEndpointMonitoringMetric] = []
632
+ ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
633
+ """Assuming .#schema items are filtered out"""
634
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
573
635
  for app_item in app_items:
574
- # See https://www.iguazio.com/docs/latest-release/services/data-layer/reference/system-attributes/#sys-attr-__name
575
636
  app_name = app_item.pop("__name")
576
- if app_name == ".#schema":
577
- continue
578
637
  for result_name in app_item:
579
638
  metrics.append(
580
- mm_constants.ModelEndpointMonitoringMetric(
639
+ mm_schemas.ModelEndpointMonitoringMetric(
581
640
  project=self.project,
582
641
  app=app_name,
583
- type=mm_constants.ModelEndpointMonitoringMetricType.RESULT,
642
+ type=mm_schemas.ModelEndpointMonitoringMetricType.RESULT,
584
643
  name=result_name,
585
- full_name=mlrun.common.schemas.model_monitoring.model_endpoints._compose_full_name(
644
+ full_name=mm_schemas.model_endpoints._compose_full_name(
586
645
  project=self.project, app=app_name, name=result_name
587
646
  ),
588
647
  )
589
648
  )
590
649
  return metrics
591
650
 
651
+ def _extract_metrics_from_items(
652
+ self, result_items: list[dict[str, str]]
653
+ ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
654
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
655
+ logger.debug("Result items", result_items=result_items)
656
+ for result_item in result_items:
657
+ app = result_item[mm_schemas.WriterEvent.APPLICATION_NAME]
658
+ name = result_item[mm_schemas.MetricData.METRIC_NAME]
659
+ metrics.append(
660
+ mm_schemas.ModelEndpointMonitoringMetric(
661
+ project=self.project,
662
+ app=app,
663
+ type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
664
+ name=name,
665
+ full_name=mm_schemas.model_endpoints._compose_full_name(
666
+ project=self.project,
667
+ app=app,
668
+ name=name,
669
+ type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
670
+ ),
671
+ )
672
+ )
673
+ return metrics
674
+
592
675
  def get_model_endpoint_metrics(
593
- self, endpoint_id: str
594
- ) -> list[mm_constants.ModelEndpointMonitoringMetric]:
676
+ self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType
677
+ ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
595
678
  """Get model monitoring results and metrics on the endpoint"""
596
- metrics: list[mm_constants.ModelEndpointMonitoringMetric] = []
679
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
597
680
  container = self.get_v3io_monitoring_apps_container(self.project)
681
+ if type == mm_schemas.ModelEndpointMonitoringMetricType.METRIC:
682
+ table_path = self._get_metrics_table_path(endpoint_id)
683
+ items_extractor = self._extract_metrics_from_items
684
+ elif type == mm_schemas.ModelEndpointMonitoringMetricType.RESULT:
685
+ table_path = self._get_results_table_path(endpoint_id)
686
+ items_extractor = self._extract_results_from_items
687
+ else:
688
+ raise ValueError(f"Invalid metric {type = }")
689
+
690
+ def scan(
691
+ marker: typing.Optional[str] = None,
692
+ ) -> v3io.dataplane.response.Response:
693
+ # TODO: Use AIO client: `v3io.aio.dataplane.client.Client`
694
+ return self.client.kv.scan(
695
+ container=container,
696
+ table_path=table_path,
697
+ marker=marker,
698
+ filter_expression=_EXCLUDE_SCHEMA_FILTER_EXPRESSION,
699
+ )
700
+
598
701
  try:
599
- response = self.client.kv.scan(container=container, table_path=endpoint_id)
702
+ response = scan()
600
703
  except v3io.dataplane.response.HttpResponseError as err:
601
704
  if err.status_code == HTTPStatus.NOT_FOUND:
602
705
  logger.warning(
603
- "Attempt getting metrics and results - no data. Check the "
706
+ f"Attempt getting {type}s - no data. Check the "
604
707
  "project name, endpoint, or wait for the applications to start.",
605
708
  container=container,
606
- table_path=endpoint_id,
709
+ table_path=table_path,
607
710
  )
608
711
  return []
609
712
  raise
610
713
 
611
714
  while True:
612
- metrics.extend(self._extract_metrics_from_items(response.output.items))
613
- if response.output.last:
715
+ output = typing.cast(v3io.dataplane.output.GetItemsOutput, response.output)
716
+ metrics.extend(items_extractor(output.items))
717
+ if output.last:
614
718
  break
615
- # TODO: Use AIO client: `v3io.aio.dataplane.client.Client`
616
- response = self.client.kv.scan(
617
- container=container,
618
- table_path=endpoint_id,
619
- marker=response.output.next_marker,
620
- )
719
+ response = scan(marker=output.next_marker)
621
720
 
622
721
  return metrics
@@ -25,6 +25,7 @@ class ObjectTSDBFactory(enum.Enum):
25
25
  """Enum class to handle the different TSDB connector type values for storing real time metrics"""
26
26
 
27
27
  v3io_tsdb = "v3io-tsdb"
28
+ tdengine = "tdengine"
28
29
 
29
30
  def to_tsdb_connector(self, project: str, **kwargs) -> TSDBConnector:
30
31
  """
@@ -43,6 +44,13 @@ class ObjectTSDBFactory(enum.Enum):
43
44
 
44
45
  return V3IOTSDBConnector(project=project, **kwargs)
45
46
 
47
+ # Assuming TDEngine connector if connector type is not V3IO TSDB.
48
+ # Update these lines once there are more than two connector types.
49
+
50
+ from .tdengine.tdengine_connector import TDEngineConnector
51
+
52
+ return TDEngineConnector(project=project, **kwargs)
53
+
46
54
  @classmethod
47
55
  def _missing_(cls, value: typing.Any):
48
56
  """A lookup function to handle an invalid value.
@@ -54,18 +62,39 @@ class ObjectTSDBFactory(enum.Enum):
54
62
  )
55
63
 
56
64
 
57
- def get_tsdb_connector(project: str, **kwargs) -> TSDBConnector:
65
+ def get_tsdb_connector(
66
+ project: str,
67
+ tsdb_connector_type: str = "",
68
+ secret_provider: typing.Callable = None,
69
+ **kwargs,
70
+ ) -> TSDBConnector:
58
71
  """
59
- Get the TSDB connector type based on mlrun.config.model_endpoint_monitoring.tsdb_connector_type.
72
+ Get TSDB connector object.
60
73
  :param project: The name of the project.
74
+ :param tsdb_connector_type: The type of the TSDB connector. See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory
75
+ for available options.
76
+ :param secret_provider: An optional secret provider to get the connection string secret.
77
+
61
78
  :return: `TSDBConnector` object. The main goal of this object is to handle different operations on the
62
79
  TSDB connector such as updating drift metrics or write application record result.
63
80
  """
64
81
 
65
- # Get store type value from ObjectTSDBFactory enum class
66
- tsdb_connector_type = ObjectTSDBFactory(
67
- mlrun.mlconf.model_endpoint_monitoring.tsdb_connector_type
82
+ tsdb_connection_string = mlrun.model_monitoring.helpers.get_tsdb_connection_string(
83
+ secret_provider=secret_provider
68
84
  )
69
85
 
86
+ if tsdb_connection_string and tsdb_connection_string.startswith("taosws"):
87
+ tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
88
+ kwargs["connection_string"] = tsdb_connection_string
89
+
90
+ # Set the default TSDB connector type if no connection has been set
91
+ tsdb_connector_type = (
92
+ tsdb_connector_type
93
+ or mlrun.mlconf.model_endpoint_monitoring.tsdb_connector_type
94
+ )
95
+
96
+ # Get connector type value from ObjectTSDBFactory enum class
97
+ tsdb_connector_factory = ObjectTSDBFactory(tsdb_connector_type)
98
+
70
99
  # Convert into TSDB connector object
71
- return tsdb_connector_type.to_tsdb_connector(project=project, **kwargs)
100
+ return tsdb_connector_factory.to_tsdb_connector(project=project, **kwargs)
@@ -14,6 +14,7 @@
14
14
  #
15
15
 
16
16
 
17
+ import typing
17
18
  from abc import ABC
18
19
 
19
20
  import pandas as pd
@@ -22,6 +23,8 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
22
23
 
23
24
 
24
25
  class TSDBConnector(ABC):
26
+ type: str = ""
27
+
25
28
  def __init__(self, project: str):
26
29
  """
27
30
  Initialize a new TSDB connector. The connector is used to interact with the TSDB and store monitoring data.
@@ -57,13 +60,12 @@ class TSDBConnector(ABC):
57
60
  self,
58
61
  event: dict,
59
62
  kind: mm_constants.WriterEventKind = mm_constants.WriterEventKind.RESULT,
60
- ):
63
+ ) -> None:
61
64
  """
62
65
  Write a single application or metric to TSDB.
63
66
 
64
67
  :raise mlrun.errors.MLRunRuntimeError: If an error occurred while writing the event.
65
68
  """
66
- pass
67
69
 
68
70
  def delete_tsdb_resources(self):
69
71
  """
@@ -76,8 +78,8 @@ class TSDBConnector(ABC):
76
78
  self,
77
79
  endpoint_id: str,
78
80
  metrics: list[str],
79
- start: str = "now-1h",
80
- end: str = "now",
81
+ start: str,
82
+ end: str,
81
83
  ) -> dict[str, list[tuple[str, float]]]:
82
84
  """
83
85
  Getting real time metrics from the TSDB. There are pre-defined metrics for model endpoints such as
@@ -101,35 +103,40 @@ class TSDBConnector(ABC):
101
103
  def get_records(
102
104
  self,
103
105
  table: str,
104
- columns: list[str] = None,
106
+ start: str,
107
+ end: str,
108
+ columns: typing.Optional[list[str]] = None,
105
109
  filter_query: str = "",
106
- start: str = "now-1h",
107
- end: str = "now",
108
110
  ) -> pd.DataFrame:
109
111
  """
110
112
  Getting records from TSDB data collection.
111
113
  :param table: Table name, e.g. 'metrics', 'app_results'.
114
+ :param start: The start time of the metrics.
115
+ If using V3IO, can be represented by a string containing an RFC 3339 time, a Unix
116
+ timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
117
+ `m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest
118
+ time.
119
+ If using TDEngine, can be represented by datetime.
120
+ :param end: The end time of the metrics.
121
+ If using V3IO, can be represented by a string containing an RFC 3339 time, a Unix
122
+ timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
123
+ `m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest
124
+ time.
125
+ If using TDEngine, can be represented by datetime.
112
126
  :param columns: Columns to include in the result.
113
127
  :param filter_query: Optional filter expression as a string. The filter structure depends on the TSDB
114
128
  connector type.
115
- :param start: The start time of the metrics. Can be represented by a string containing an RFC
116
- 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
117
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and `'s'`
118
- = seconds), or 0 for the earliest time.
119
- :param end: The end time of the metrics. Can be represented by a string containing an RFC
120
- 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
121
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, `'d'` = days, and `'s'`
122
- = seconds), or 0 for the earliest time.
129
+
123
130
 
124
131
  :return: DataFrame with the provided attributes from the data collection.
125
132
  :raise: MLRunNotFoundError if the provided table wasn't found.
126
133
  """
127
134
  pass
128
135
 
129
- def create_tsdb_application_tables(self):
136
+ def create_tables(self) -> None:
130
137
  """
131
- Create the application tables using the TSDB connector. At the moment we support 2 types of application tables:
138
+ Create the TSDB tables using the TSDB connector. At the moment we support 3 types of tables:
132
139
  - app_results: a detailed result that includes status, kind, extra data, etc.
133
140
  - metrics: a basic key value that represents a numeric metric.
141
+ - predictions: latency of each prediction.
134
142
  """
135
- pass
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .tdengine_connector import TDEngineConnector