mlrun 1.8.0rc30__py3-none-any.whl → 1.8.0rc32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (36) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/api/schemas/__init__.py +1 -6
  3. mlrun/common/runtimes/constants.py +4 -0
  4. mlrun/common/schemas/__init__.py +0 -2
  5. mlrun/common/schemas/model_monitoring/__init__.py +0 -2
  6. mlrun/common/schemas/model_monitoring/constants.py +1 -6
  7. mlrun/common/schemas/model_monitoring/grafana.py +17 -11
  8. mlrun/config.py +9 -36
  9. mlrun/datastore/storeytargets.py +20 -3
  10. mlrun/db/base.py +1 -1
  11. mlrun/db/httpdb.py +5 -4
  12. mlrun/db/nopdb.py +1 -1
  13. mlrun/model_monitoring/applications/base.py +111 -40
  14. mlrun/model_monitoring/applications/results.py +2 -2
  15. mlrun/model_monitoring/controller.py +4 -3
  16. mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
  17. mlrun/model_monitoring/db/tsdb/base.py +60 -39
  18. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -52
  19. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
  20. mlrun/model_monitoring/helpers.py +16 -15
  21. mlrun/model_monitoring/stream_processing.py +6 -13
  22. mlrun/projects/pipelines.py +11 -3
  23. mlrun/projects/project.py +88 -111
  24. mlrun/serving/states.py +1 -1
  25. mlrun/serving/v2_serving.py +20 -10
  26. mlrun/utils/helpers.py +1 -1
  27. mlrun/utils/logger.py +13 -10
  28. mlrun/utils/notifications/notification_pusher.py +24 -0
  29. mlrun/utils/regex.py +1 -0
  30. mlrun/utils/version/version.json +2 -2
  31. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/METADATA +2 -2
  32. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/RECORD +36 -36
  33. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/LICENSE +0 -0
  34. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/WHEEL +0 -0
  35. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/entry_points.txt +0 -0
  36. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc32.dist-info}/top_level.txt +0 -0
@@ -11,10 +11,11 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import asyncio
15
+ import math
15
16
  from datetime import datetime, timedelta, timezone
16
17
  from io import StringIO
17
- from typing import Literal, Optional, Union
18
+ from typing import Callable, Literal, Optional, Union
18
19
 
19
20
  import pandas as pd
20
21
  import v3io_frames
@@ -491,8 +492,9 @@ class V3IOTSDBConnector(TSDBConnector):
491
492
  interval: Optional[str] = None,
492
493
  agg_funcs: Optional[list[str]] = None,
493
494
  sliding_window_step: Optional[str] = None,
495
+ get_raw: bool = False,
494
496
  **kwargs,
495
- ) -> pd.DataFrame:
497
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
496
498
  """
497
499
  Getting records from V3IO TSDB data collection.
498
500
  :param table: Path to the collection to query.
@@ -517,6 +519,10 @@ class V3IOTSDBConnector(TSDBConnector):
517
519
  `sliding_window_step` is provided, interval must be provided as well. Provided
518
520
  as a string in the format of '1m', '1h', etc.
519
521
  :param kwargs: Additional keyword arguments passed to the read method of frames client.
522
+ :param get_raw: Whether to return the request as raw frames rather than a pandas dataframe.
523
+ Defaults to False. This can greatly improve performance when a dataframe isn't
524
+ needed.
525
+
520
526
  :return: DataFrame with the provided attributes from the data collection.
521
527
  :raise: MLRunNotFoundError if the provided table wasn't found.
522
528
  """
@@ -530,7 +536,7 @@ class V3IOTSDBConnector(TSDBConnector):
530
536
  aggregators = ",".join(agg_funcs) if agg_funcs else None
531
537
  table_path = self.tables[table]
532
538
  try:
533
- df = self.frames_client.read(
539
+ res = self.frames_client.read(
534
540
  backend=_TSDB_BE,
535
541
  table=table_path,
536
542
  start=start,
@@ -540,15 +546,18 @@ class V3IOTSDBConnector(TSDBConnector):
540
546
  aggregation_window=interval,
541
547
  aggregators=aggregators,
542
548
  step=sliding_window_step,
549
+ get_raw=get_raw,
543
550
  **kwargs,
544
551
  )
552
+ if get_raw:
553
+ res = list(res)
545
554
  except v3io_frames.Error as err:
546
555
  if _is_no_schema_error(err):
547
- return pd.DataFrame()
556
+ return [] if get_raw else pd.DataFrame()
548
557
  else:
549
558
  raise err
550
559
 
551
- return df
560
+ return res
552
561
 
553
562
  def _get_v3io_source_directory(self) -> str:
554
563
  """
@@ -778,16 +787,23 @@ class V3IOTSDBConnector(TSDBConnector):
778
787
  endpoint_ids: Union[str, list[str]],
779
788
  start: Optional[datetime] = None,
780
789
  end: Optional[datetime] = None,
781
- ) -> pd.DataFrame:
790
+ get_raw: bool = False,
791
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
782
792
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
783
793
  start, end = self._get_start_end(start, end)
784
- df = self._get_records(
794
+ res = self._get_records(
785
795
  table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
786
796
  start=start,
787
797
  end=end,
788
798
  filter_query=filter_query,
789
799
  agg_funcs=["last"],
800
+ get_raw=get_raw,
790
801
  )
802
+
803
+ if get_raw:
804
+ return res
805
+
806
+ df = res
791
807
  if not df.empty:
792
808
  df.rename(
793
809
  columns={
@@ -811,11 +827,12 @@ class V3IOTSDBConnector(TSDBConnector):
811
827
  endpoint_ids: Union[str, list[str]],
812
828
  start: Optional[datetime] = None,
813
829
  end: Optional[datetime] = None,
814
- ) -> pd.DataFrame:
830
+ get_raw: bool = False,
831
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
815
832
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
816
833
  start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
817
834
  start, end = self._get_start_end(start, end)
818
- df = self._get_records(
835
+ res = self._get_records(
819
836
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
820
837
  start=start,
821
838
  end=end,
@@ -823,7 +840,12 @@ class V3IOTSDBConnector(TSDBConnector):
823
840
  filter_query=filter_query,
824
841
  agg_funcs=["max"],
825
842
  group_by="endpoint_id",
843
+ get_raw=get_raw,
826
844
  )
845
+ if get_raw:
846
+ return res
847
+
848
+ df = res
827
849
  if not df.empty:
828
850
  df.columns = [
829
851
  col[len("max(") : -1] if "max(" in col else col for col in df.columns
@@ -884,21 +906,28 @@ class V3IOTSDBConnector(TSDBConnector):
884
906
  endpoint_ids: Union[str, list[str]],
885
907
  start: Optional[datetime] = None,
886
908
  end: Optional[datetime] = None,
887
- ) -> pd.DataFrame:
909
+ get_raw: bool = False,
910
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
888
911
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
889
912
  if filter_query:
890
913
  filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
891
914
  else:
892
915
  filter_query = f"{mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}' z"
893
916
  start, end = self._get_start_end(start, end)
894
- df = self._get_records(
917
+ res = self._get_records(
895
918
  table=mm_schemas.FileTargetKind.ERRORS,
896
919
  start=start,
897
920
  end=end,
898
921
  columns=[mm_schemas.EventFieldType.ERROR_COUNT],
899
922
  filter_query=filter_query,
900
923
  agg_funcs=["count"],
924
+ get_raw=get_raw,
901
925
  )
926
+
927
+ if get_raw:
928
+ return res
929
+
930
+ df = res
902
931
  if not df.empty:
903
932
  df.rename(
904
933
  columns={
@@ -914,18 +943,25 @@ class V3IOTSDBConnector(TSDBConnector):
914
943
  endpoint_ids: Union[str, list[str]],
915
944
  start: Optional[datetime] = None,
916
945
  end: Optional[datetime] = None,
917
- ) -> pd.DataFrame:
946
+ get_raw: bool = False,
947
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
918
948
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
919
949
  start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
920
950
  start, end = self._get_start_end(start, end)
921
- df = self._get_records(
951
+ res = self._get_records(
922
952
  table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
923
953
  start=start,
924
954
  end=end,
925
955
  columns=[mm_schemas.EventFieldType.LATENCY],
926
956
  filter_query=filter_query,
927
957
  agg_funcs=["avg"],
958
+ get_raw=get_raw,
928
959
  )
960
+
961
+ if get_raw:
962
+ return res
963
+
964
+ df = res
929
965
  if not df.empty:
930
966
  df.dropna(inplace=True)
931
967
  df.rename(
@@ -935,3 +971,93 @@ class V3IOTSDBConnector(TSDBConnector):
935
971
  inplace=True,
936
972
  )
937
973
  return df.reset_index(drop=True)
974
+
975
+ async def add_basic_metrics(
976
+ self,
977
+ model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
978
+ project: str,
979
+ run_in_threadpool: Callable,
980
+ ) -> list[mlrun.common.schemas.ModelEndpoint]:
981
+ """
982
+ Fetch basic metrics from V3IO TSDB and add them to MEP objects.
983
+
984
+ :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
985
+ be filled with the relevant basic metrics.
986
+ :param project: The name of the project.
987
+ :param run_in_threadpool: A function that runs another function in a thread pool.
988
+
989
+ :return: A list of `ModelEndpointMonitoringMetric` objects.
990
+ """
991
+
992
+ uids = []
993
+ model_endpoint_objects_by_uid = {}
994
+ for model_endpoint_object in model_endpoint_objects:
995
+ uid = model_endpoint_object.metadata.uid
996
+ uids.append(uid)
997
+ model_endpoint_objects_by_uid[uid] = model_endpoint_object
998
+
999
+ coroutines = [
1000
+ run_in_threadpool(
1001
+ self.get_error_count,
1002
+ endpoint_ids=uids,
1003
+ get_raw=True,
1004
+ ),
1005
+ run_in_threadpool(
1006
+ self.get_last_request,
1007
+ endpoint_ids=uids,
1008
+ get_raw=True,
1009
+ ),
1010
+ run_in_threadpool(
1011
+ self.get_avg_latency,
1012
+ endpoint_ids=uids,
1013
+ get_raw=True,
1014
+ ),
1015
+ run_in_threadpool(
1016
+ self.get_drift_status,
1017
+ endpoint_ids=uids,
1018
+ get_raw=True,
1019
+ ),
1020
+ ]
1021
+
1022
+ (
1023
+ error_count_res,
1024
+ last_request_res,
1025
+ avg_latency_res,
1026
+ drift_status_res,
1027
+ ) = await asyncio.gather(*coroutines)
1028
+
1029
+ def add_metric(
1030
+ metric: str,
1031
+ column_name: str,
1032
+ frames: list,
1033
+ ):
1034
+ for frame in frames:
1035
+ endpoint_ids = frame.column_data("endpoint_id")
1036
+ metric_data = frame.column_data(column_name)
1037
+ for index, endpoint_id in enumerate(endpoint_ids):
1038
+ mep = model_endpoint_objects_by_uid.get(endpoint_id)
1039
+ value = metric_data[index]
1040
+ if mep and value is not None and not math.isnan(value):
1041
+ setattr(mep.status, metric, value)
1042
+
1043
+ add_metric(
1044
+ "error_count",
1045
+ "count(error_count)",
1046
+ error_count_res,
1047
+ )
1048
+ add_metric(
1049
+ "last_request",
1050
+ "last(last_request_timestamp)",
1051
+ last_request_res,
1052
+ )
1053
+ add_metric(
1054
+ "avg_latency",
1055
+ "max(result_status)",
1056
+ drift_status_res,
1057
+ )
1058
+ add_metric(
1059
+ "result_status",
1060
+ "avg(latency)",
1061
+ avg_latency_res,
1062
+ )
1063
+ return list(model_endpoint_objects_by_uid.values())
@@ -246,21 +246,6 @@ def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataI
246
246
  )
247
247
 
248
248
 
249
- def get_tsdb_connection_string(
250
- secret_provider: Optional[Callable[[str], str]] = None,
251
- ) -> str:
252
- """Get TSDB connection string from the project secret. If wasn't set, take it from the system
253
- configurations.
254
- :param secret_provider: An optional secret provider to get the connection string secret.
255
- :return: Valid TSDB connection string.
256
- """
257
-
258
- return mlrun.get_secret_or_env(
259
- key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
260
- secret_provider=secret_provider,
261
- )
262
-
263
-
264
249
  def _get_profile(
265
250
  project: str,
266
251
  secret_provider: Optional[Callable[[str], str]],
@@ -554,6 +539,22 @@ def get_result_instance_fqn(
554
539
  return f"{model_endpoint_id}.{app_name}.result.{result_name}"
555
540
 
556
541
 
542
+ def get_alert_name_from_result_fqn(result_fqn: str):
543
+ """
544
+ :param result_fqn: current get_result_instance_fqn format: `{model_endpoint_id}.{app_name}.result.{result_name}`
545
+
546
+ :return: shorter fqn without forbidden alert characters.
547
+ """
548
+ if result_fqn.count(".") != 3 or result_fqn.split(".")[2] != "result":
549
+ raise mlrun.errors.MLRunValueError(
550
+ f"result_fqn: {result_fqn} is not in the correct format: {{model_endpoint_id}}.{{app_name}}."
551
+ f"result.{{result_name}}"
552
+ )
553
+ # Name format cannot contain "."
554
+ # The third component is always `result`, so it is not necessary for checking uniqueness.
555
+ return "_".join(result_fqn.split(".")[i] for i in [0, 1, 3])
556
+
557
+
557
558
  def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
558
559
  return get_result_instance_fqn(
559
560
  model_endpoint_id,
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import datetime
16
- import os
17
16
  import typing
18
17
 
19
18
  import storey
@@ -65,14 +64,11 @@ class EventStreamProcessor:
65
64
  parquet_batching_max_events=self.parquet_batching_max_events,
66
65
  )
67
66
 
68
- self.storage_options = None
69
67
  self.tsdb_configurations = {}
70
68
  if not mlrun.mlconf.is_ce_mode():
71
69
  self._initialize_v3io_configurations(
72
70
  model_monitoring_access_key=model_monitoring_access_key
73
71
  )
74
- elif self.parquet_path.startswith("s3://"):
75
- self.storage_options = mlrun.mlconf.get_s3_storage_options()
76
72
 
77
73
  def _initialize_v3io_configurations(
78
74
  self,
@@ -87,17 +83,12 @@ class EventStreamProcessor:
87
83
  self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
88
84
  self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
89
85
 
90
- self.v3io_access_key = v3io_access_key or mlrun.get_secret_or_env(
91
- "V3IO_ACCESS_KEY"
92
- )
86
+ self.v3io_access_key = v3io_access_key or mlrun.mlconf.get_v3io_access_key()
93
87
  self.model_monitoring_access_key = (
94
88
  model_monitoring_access_key
95
- or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
89
+ or mlrun.get_secret_or_env(ProjectSecretKeys.ACCESS_KEY)
96
90
  or self.v3io_access_key
97
91
  )
98
- self.storage_options = dict(
99
- v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
100
- )
101
92
 
102
93
  # TSDB path and configurations
103
94
  tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
@@ -248,12 +239,12 @@ class EventStreamProcessor:
248
239
  # Write the Parquet target file, partitioned by key (endpoint_id) and time.
249
240
  def apply_parquet_target():
250
241
  graph.add_step(
251
- "storey.ParquetTarget",
242
+ "mlrun.datastore.storeytargets.ParquetStoreyTarget",
243
+ alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
252
244
  name="ParquetTarget",
253
245
  after="ProcessBeforeParquet",
254
246
  graph_shape="cylinder",
255
247
  path=self.parquet_path,
256
- storage_options=self.storage_options,
257
248
  max_events=self.parquet_batching_max_events,
258
249
  flush_after_seconds=self.parquet_batching_timeout_secs,
259
250
  attributes={"infer_columns_from_data": True},
@@ -794,6 +785,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
794
785
 
795
786
  """
796
787
  event[mapping_dictionary] = {}
788
+ diff = len(named_iters) - len(values_iters)
789
+ values_iters += [None] * diff
797
790
  for name, value in zip(named_iters, values_iters):
798
791
  event[name] = value
799
792
  event[mapping_dictionary][name] = value
@@ -615,13 +615,21 @@ class _KFPRunner(_PipelineRunner):
615
615
  "Notifications will only be sent if you wait for pipeline completion. "
616
616
  "Some of the features (like setting message or severity level) are not supported."
617
617
  )
618
- # for start message, fallback to old notification behavior
619
618
  for notification in notifications or []:
620
619
  params = notification.params
621
620
  params.update(notification.secret_params)
622
- project.notifiers.add_notification(notification.kind, params)
621
+ project.notifiers.add_notification(
622
+ notification_type=notification.kind,
623
+ params=params,
624
+ name=notification.name,
625
+ message=notification.message,
626
+ severity=notification.severity,
627
+ when=notification.when,
628
+ condition=notification.condition,
629
+ secret_params=notification.secret_params,
630
+ )
623
631
 
624
- project.spec.notifications = notifications
632
+ project.spec.notifications = project.notifiers.server_notifications
625
633
 
626
634
  run_id = _run_pipeline(
627
635
  workflow_handler,