mlrun 1.8.0rc30__py3-none-any.whl → 1.8.0rc31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (33) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/api/schemas/__init__.py +1 -6
  3. mlrun/common/runtimes/constants.py +4 -0
  4. mlrun/common/schemas/__init__.py +0 -2
  5. mlrun/common/schemas/model_monitoring/__init__.py +0 -2
  6. mlrun/common/schemas/model_monitoring/constants.py +1 -6
  7. mlrun/common/schemas/model_monitoring/grafana.py +17 -11
  8. mlrun/config.py +9 -36
  9. mlrun/datastore/storeytargets.py +20 -3
  10. mlrun/model_monitoring/applications/base.py +55 -40
  11. mlrun/model_monitoring/applications/results.py +2 -2
  12. mlrun/model_monitoring/controller.py +4 -3
  13. mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
  14. mlrun/model_monitoring/db/tsdb/base.py +60 -39
  15. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -52
  16. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
  17. mlrun/model_monitoring/helpers.py +16 -15
  18. mlrun/model_monitoring/stream_processing.py +6 -13
  19. mlrun/projects/pipelines.py +11 -3
  20. mlrun/projects/project.py +84 -107
  21. mlrun/serving/states.py +1 -1
  22. mlrun/serving/v2_serving.py +20 -10
  23. mlrun/utils/helpers.py +1 -1
  24. mlrun/utils/logger.py +13 -10
  25. mlrun/utils/notifications/notification_pusher.py +24 -0
  26. mlrun/utils/regex.py +1 -0
  27. mlrun/utils/version/version.json +2 -2
  28. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/METADATA +2 -2
  29. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/RECORD +33 -33
  30. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/LICENSE +0 -0
  31. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/WHEEL +0 -0
  32. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/entry_points.txt +0 -0
  33. {mlrun-1.8.0rc30.dist-info → mlrun-1.8.0rc31.dist-info}/top_level.txt +0 -0
@@ -11,10 +11,11 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import asyncio
15
+ import math
15
16
  from datetime import datetime, timedelta, timezone
16
17
  from io import StringIO
17
- from typing import Literal, Optional, Union
18
+ from typing import Callable, Literal, Optional, Union
18
19
 
19
20
  import pandas as pd
20
21
  import v3io_frames
@@ -491,8 +492,9 @@ class V3IOTSDBConnector(TSDBConnector):
491
492
  interval: Optional[str] = None,
492
493
  agg_funcs: Optional[list[str]] = None,
493
494
  sliding_window_step: Optional[str] = None,
495
+ get_raw: bool = False,
494
496
  **kwargs,
495
- ) -> pd.DataFrame:
497
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
496
498
  """
497
499
  Getting records from V3IO TSDB data collection.
498
500
  :param table: Path to the collection to query.
@@ -517,6 +519,10 @@ class V3IOTSDBConnector(TSDBConnector):
517
519
  `sliding_window_step` is provided, interval must be provided as well. Provided
518
520
  as a string in the format of '1m', '1h', etc.
519
521
  :param kwargs: Additional keyword arguments passed to the read method of frames client.
522
+ :param get_raw: Whether to return the request as raw frames rather than a pandas dataframe.
523
+ Defaults to False. This can greatly improve performance when a dataframe isn't
524
+ needed.
525
+
520
526
  :return: DataFrame with the provided attributes from the data collection.
521
527
  :raise: MLRunNotFoundError if the provided table wasn't found.
522
528
  """
@@ -530,7 +536,7 @@ class V3IOTSDBConnector(TSDBConnector):
530
536
  aggregators = ",".join(agg_funcs) if agg_funcs else None
531
537
  table_path = self.tables[table]
532
538
  try:
533
- df = self.frames_client.read(
539
+ res = self.frames_client.read(
534
540
  backend=_TSDB_BE,
535
541
  table=table_path,
536
542
  start=start,
@@ -540,15 +546,18 @@ class V3IOTSDBConnector(TSDBConnector):
540
546
  aggregation_window=interval,
541
547
  aggregators=aggregators,
542
548
  step=sliding_window_step,
549
+ get_raw=get_raw,
543
550
  **kwargs,
544
551
  )
552
+ if get_raw:
553
+ res = list(res)
545
554
  except v3io_frames.Error as err:
546
555
  if _is_no_schema_error(err):
547
- return pd.DataFrame()
556
+ return [] if get_raw else pd.DataFrame()
548
557
  else:
549
558
  raise err
550
559
 
551
- return df
560
+ return res
552
561
 
553
562
  def _get_v3io_source_directory(self) -> str:
554
563
  """
@@ -778,16 +787,23 @@ class V3IOTSDBConnector(TSDBConnector):
778
787
  endpoint_ids: Union[str, list[str]],
779
788
  start: Optional[datetime] = None,
780
789
  end: Optional[datetime] = None,
781
- ) -> pd.DataFrame:
790
+ get_raw: bool = False,
791
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
782
792
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
783
793
  start, end = self._get_start_end(start, end)
784
- df = self._get_records(
794
+ res = self._get_records(
785
795
  table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
786
796
  start=start,
787
797
  end=end,
788
798
  filter_query=filter_query,
789
799
  agg_funcs=["last"],
800
+ get_raw=get_raw,
790
801
  )
802
+
803
+ if get_raw:
804
+ return res
805
+
806
+ df = res
791
807
  if not df.empty:
792
808
  df.rename(
793
809
  columns={
@@ -811,11 +827,12 @@ class V3IOTSDBConnector(TSDBConnector):
811
827
  endpoint_ids: Union[str, list[str]],
812
828
  start: Optional[datetime] = None,
813
829
  end: Optional[datetime] = None,
814
- ) -> pd.DataFrame:
830
+ get_raw: bool = False,
831
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
815
832
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
816
833
  start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
817
834
  start, end = self._get_start_end(start, end)
818
- df = self._get_records(
835
+ res = self._get_records(
819
836
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
820
837
  start=start,
821
838
  end=end,
@@ -823,7 +840,12 @@ class V3IOTSDBConnector(TSDBConnector):
823
840
  filter_query=filter_query,
824
841
  agg_funcs=["max"],
825
842
  group_by="endpoint_id",
843
+ get_raw=get_raw,
826
844
  )
845
+ if get_raw:
846
+ return res
847
+
848
+ df = res
827
849
  if not df.empty:
828
850
  df.columns = [
829
851
  col[len("max(") : -1] if "max(" in col else col for col in df.columns
@@ -884,21 +906,28 @@ class V3IOTSDBConnector(TSDBConnector):
884
906
  endpoint_ids: Union[str, list[str]],
885
907
  start: Optional[datetime] = None,
886
908
  end: Optional[datetime] = None,
887
- ) -> pd.DataFrame:
909
+ get_raw: bool = False,
910
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
888
911
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
889
912
  if filter_query:
890
913
  filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
891
914
  else:
892
915
  filter_query = f"{mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}' z"
893
916
  start, end = self._get_start_end(start, end)
894
- df = self._get_records(
917
+ res = self._get_records(
895
918
  table=mm_schemas.FileTargetKind.ERRORS,
896
919
  start=start,
897
920
  end=end,
898
921
  columns=[mm_schemas.EventFieldType.ERROR_COUNT],
899
922
  filter_query=filter_query,
900
923
  agg_funcs=["count"],
924
+ get_raw=get_raw,
901
925
  )
926
+
927
+ if get_raw:
928
+ return res
929
+
930
+ df = res
902
931
  if not df.empty:
903
932
  df.rename(
904
933
  columns={
@@ -914,18 +943,25 @@ class V3IOTSDBConnector(TSDBConnector):
914
943
  endpoint_ids: Union[str, list[str]],
915
944
  start: Optional[datetime] = None,
916
945
  end: Optional[datetime] = None,
917
- ) -> pd.DataFrame:
946
+ get_raw: bool = False,
947
+ ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
918
948
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
919
949
  start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
920
950
  start, end = self._get_start_end(start, end)
921
- df = self._get_records(
951
+ res = self._get_records(
922
952
  table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
923
953
  start=start,
924
954
  end=end,
925
955
  columns=[mm_schemas.EventFieldType.LATENCY],
926
956
  filter_query=filter_query,
927
957
  agg_funcs=["avg"],
958
+ get_raw=get_raw,
928
959
  )
960
+
961
+ if get_raw:
962
+ return res
963
+
964
+ df = res
929
965
  if not df.empty:
930
966
  df.dropna(inplace=True)
931
967
  df.rename(
@@ -935,3 +971,93 @@ class V3IOTSDBConnector(TSDBConnector):
935
971
  inplace=True,
936
972
  )
937
973
  return df.reset_index(drop=True)
974
+
975
+ async def add_basic_metrics(
976
+ self,
977
+ model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
978
+ project: str,
979
+ run_in_threadpool: Callable,
980
+ ) -> list[mlrun.common.schemas.ModelEndpoint]:
981
+ """
982
+ Fetch basic metrics from V3IO TSDB and add them to MEP objects.
983
+
984
+ :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
985
+ be filled with the relevant basic metrics.
986
+ :param project: The name of the project.
987
+ :param run_in_threadpool: A function that runs another function in a thread pool.
988
+
989
+ :return: A list of `ModelEndpointMonitoringMetric` objects.
990
+ """
991
+
992
+ uids = []
993
+ model_endpoint_objects_by_uid = {}
994
+ for model_endpoint_object in model_endpoint_objects:
995
+ uid = model_endpoint_object.metadata.uid
996
+ uids.append(uid)
997
+ model_endpoint_objects_by_uid[uid] = model_endpoint_object
998
+
999
+ coroutines = [
1000
+ run_in_threadpool(
1001
+ self.get_error_count,
1002
+ endpoint_ids=uids,
1003
+ get_raw=True,
1004
+ ),
1005
+ run_in_threadpool(
1006
+ self.get_last_request,
1007
+ endpoint_ids=uids,
1008
+ get_raw=True,
1009
+ ),
1010
+ run_in_threadpool(
1011
+ self.get_avg_latency,
1012
+ endpoint_ids=uids,
1013
+ get_raw=True,
1014
+ ),
1015
+ run_in_threadpool(
1016
+ self.get_drift_status,
1017
+ endpoint_ids=uids,
1018
+ get_raw=True,
1019
+ ),
1020
+ ]
1021
+
1022
+ (
1023
+ error_count_res,
1024
+ last_request_res,
1025
+ avg_latency_res,
1026
+ drift_status_res,
1027
+ ) = await asyncio.gather(*coroutines)
1028
+
1029
+ def add_metric(
1030
+ metric: str,
1031
+ column_name: str,
1032
+ frames: list,
1033
+ ):
1034
+ for frame in frames:
1035
+ endpoint_ids = frame.column_data("endpoint_id")
1036
+ metric_data = frame.column_data(column_name)
1037
+ for index, endpoint_id in enumerate(endpoint_ids):
1038
+ mep = model_endpoint_objects_by_uid.get(endpoint_id)
1039
+ value = metric_data[index]
1040
+ if mep and value is not None and not math.isnan(value):
1041
+ setattr(mep.status, metric, value)
1042
+
1043
+ add_metric(
1044
+ "error_count",
1045
+ "count(error_count)",
1046
+ error_count_res,
1047
+ )
1048
+ add_metric(
1049
+ "last_request",
1050
+ "last(last_request_timestamp)",
1051
+ last_request_res,
1052
+ )
1053
+ add_metric(
1054
+ "avg_latency",
1055
+ "max(result_status)",
1056
+ drift_status_res,
1057
+ )
1058
+ add_metric(
1059
+ "result_status",
1060
+ "avg(latency)",
1061
+ avg_latency_res,
1062
+ )
1063
+ return list(model_endpoint_objects_by_uid.values())
@@ -246,21 +246,6 @@ def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataI
246
246
  )
247
247
 
248
248
 
249
- def get_tsdb_connection_string(
250
- secret_provider: Optional[Callable[[str], str]] = None,
251
- ) -> str:
252
- """Get TSDB connection string from the project secret. If wasn't set, take it from the system
253
- configurations.
254
- :param secret_provider: An optional secret provider to get the connection string secret.
255
- :return: Valid TSDB connection string.
256
- """
257
-
258
- return mlrun.get_secret_or_env(
259
- key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
260
- secret_provider=secret_provider,
261
- )
262
-
263
-
264
249
  def _get_profile(
265
250
  project: str,
266
251
  secret_provider: Optional[Callable[[str], str]],
@@ -554,6 +539,22 @@ def get_result_instance_fqn(
554
539
  return f"{model_endpoint_id}.{app_name}.result.{result_name}"
555
540
 
556
541
 
542
+ def get_alert_name_from_result_fqn(result_fqn: str):
543
+ """
544
+ :param result_fqn: current get_result_instance_fqn format: `{model_endpoint_id}.{app_name}.result.{result_name}`
545
+
546
+ :return: shorter fqn without forbidden alert characters.
547
+ """
548
+ if result_fqn.count(".") != 3 or result_fqn.split(".")[2] != "result":
549
+ raise mlrun.errors.MLRunValueError(
550
+ f"result_fqn: {result_fqn} is not in the correct format: {{model_endpoint_id}}.{{app_name}}."
551
+ f"result.{{result_name}}"
552
+ )
553
+ # Name format cannot contain "."
554
+ # The third component is always `result`, so it is not necessary for checking uniqueness.
555
+ return "_".join(result_fqn.split(".")[i] for i in [0, 1, 3])
556
+
557
+
557
558
  def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
558
559
  return get_result_instance_fqn(
559
560
  model_endpoint_id,
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import datetime
16
- import os
17
16
  import typing
18
17
 
19
18
  import storey
@@ -65,14 +64,11 @@ class EventStreamProcessor:
65
64
  parquet_batching_max_events=self.parquet_batching_max_events,
66
65
  )
67
66
 
68
- self.storage_options = None
69
67
  self.tsdb_configurations = {}
70
68
  if not mlrun.mlconf.is_ce_mode():
71
69
  self._initialize_v3io_configurations(
72
70
  model_monitoring_access_key=model_monitoring_access_key
73
71
  )
74
- elif self.parquet_path.startswith("s3://"):
75
- self.storage_options = mlrun.mlconf.get_s3_storage_options()
76
72
 
77
73
  def _initialize_v3io_configurations(
78
74
  self,
@@ -87,17 +83,12 @@ class EventStreamProcessor:
87
83
  self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
88
84
  self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
89
85
 
90
- self.v3io_access_key = v3io_access_key or mlrun.get_secret_or_env(
91
- "V3IO_ACCESS_KEY"
92
- )
86
+ self.v3io_access_key = v3io_access_key or mlrun.mlconf.get_v3io_access_key()
93
87
  self.model_monitoring_access_key = (
94
88
  model_monitoring_access_key
95
- or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
89
+ or mlrun.get_secret_or_env(ProjectSecretKeys.ACCESS_KEY)
96
90
  or self.v3io_access_key
97
91
  )
98
- self.storage_options = dict(
99
- v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
100
- )
101
92
 
102
93
  # TSDB path and configurations
103
94
  tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
@@ -248,12 +239,12 @@ class EventStreamProcessor:
248
239
  # Write the Parquet target file, partitioned by key (endpoint_id) and time.
249
240
  def apply_parquet_target():
250
241
  graph.add_step(
251
- "storey.ParquetTarget",
242
+ "mlrun.datastore.storeytargets.ParquetStoreyTarget",
243
+ alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
252
244
  name="ParquetTarget",
253
245
  after="ProcessBeforeParquet",
254
246
  graph_shape="cylinder",
255
247
  path=self.parquet_path,
256
- storage_options=self.storage_options,
257
248
  max_events=self.parquet_batching_max_events,
258
249
  flush_after_seconds=self.parquet_batching_timeout_secs,
259
250
  attributes={"infer_columns_from_data": True},
@@ -794,6 +785,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
794
785
 
795
786
  """
796
787
  event[mapping_dictionary] = {}
788
+ diff = len(named_iters) - len(values_iters)
789
+ values_iters += [None] * diff
797
790
  for name, value in zip(named_iters, values_iters):
798
791
  event[name] = value
799
792
  event[mapping_dictionary][name] = value
@@ -615,13 +615,21 @@ class _KFPRunner(_PipelineRunner):
615
615
  "Notifications will only be sent if you wait for pipeline completion. "
616
616
  "Some of the features (like setting message or severity level) are not supported."
617
617
  )
618
- # for start message, fallback to old notification behavior
619
618
  for notification in notifications or []:
620
619
  params = notification.params
621
620
  params.update(notification.secret_params)
622
- project.notifiers.add_notification(notification.kind, params)
621
+ project.notifiers.add_notification(
622
+ notification_type=notification.kind,
623
+ params=params,
624
+ name=notification.name,
625
+ message=notification.message,
626
+ severity=notification.severity,
627
+ when=notification.when,
628
+ condition=notification.condition,
629
+ secret_params=notification.secret_params,
630
+ )
623
631
 
624
- project.spec.notifications = notifications
632
+ project.spec.notifications = project.notifiers.server_notifications
625
633
 
626
634
  run_id = _run_pipeline(
627
635
  workflow_handler,
mlrun/projects/project.py CHANGED
@@ -29,7 +29,6 @@ import zipfile
29
29
  from copy import deepcopy
30
30
  from os import environ, makedirs, path
31
31
  from typing import Callable, Optional, Union, cast
32
- from urllib.parse import urlparse
33
32
 
34
33
  import deprecated
35
34
  import dotenv
@@ -71,6 +70,7 @@ from mlrun.datastore.datastore_profile import (
71
70
  from mlrun.datastore.vectorstore import VectorStoreCollection
72
71
  from mlrun.model_monitoring.helpers import (
73
72
  filter_results_by_regex,
73
+ get_alert_name_from_result_fqn,
74
74
  get_result_instance_fqn,
75
75
  )
76
76
  from mlrun.runtimes.nuclio.function import RemoteRuntime
@@ -2142,7 +2142,8 @@ class MlrunProject(ModelObj):
2142
2142
  reset_policy: mlrun.common.schemas.alert.ResetPolicy = mlrun.common.schemas.alert.ResetPolicy.AUTO,
2143
2143
  ) -> list[mlrun.alerts.alert.AlertConfig]:
2144
2144
  """
2145
- :param name: AlertConfig name.
2145
+ :param name: The name of the AlertConfig template. It will be combined with mep_id, app-name
2146
+ and result name to generate a unique name.
2146
2147
  :param summary: Summary of the alert, will be sent in the generated notifications
2147
2148
  :param endpoints: The endpoints from which metrics will be retrieved to configure the alerts.
2148
2149
  This `ModelEndpointList` object obtained via the `list_model_endpoints`
@@ -2203,10 +2204,11 @@ class MlrunProject(ModelObj):
2203
2204
  )
2204
2205
  alert_result_names = list(set(specific_result_names + matching_results))
2205
2206
  for result_fqn in alert_result_names:
2207
+ result_fqn_name = get_alert_name_from_result_fqn(result_fqn)
2206
2208
  alerts.append(
2207
2209
  mlrun.alerts.alert.AlertConfig(
2208
2210
  project=self.name,
2209
- name=name,
2211
+ name=f"{name}--{result_fqn_name}",
2210
2212
  summary=summary,
2211
2213
  severity=severity,
2212
2214
  entities=alert_constants.EventEntities(
@@ -3671,50 +3673,77 @@ class MlrunProject(ModelObj):
3671
3673
 
3672
3674
  def set_model_monitoring_credentials(
3673
3675
  self,
3674
- access_key: Optional[str] = None,
3675
- stream_path: Optional[str] = None, # Deprecated
3676
- tsdb_connection: Optional[str] = None, # Deprecated
3677
- replace_creds: bool = False,
3678
3676
  *,
3679
- stream_profile_name: Optional[str] = None,
3680
- tsdb_profile_name: Optional[str] = None,
3681
- ):
3677
+ tsdb_profile_name: str,
3678
+ stream_profile_name: str,
3679
+ replace_creds: bool = False,
3680
+ ) -> None:
3682
3681
  """
3683
- Set the credentials that will be used by the project's model monitoring
3684
- infrastructure functions. Important to note that you have to set the credentials before deploying any
3685
- model monitoring or serving function.
3686
-
3687
- :param access_key: Model monitoring access key for managing user permissions.
3688
-
3689
- * None - will be set from the system configuration.
3690
- * v3io - for v3io endpoint store, pass `v3io` and the system will generate the
3691
- exact path.
3692
- :param stream_path: (Deprecated) This argument is deprecated. Use ``stream_profile_name`` instead.
3693
- Path to the model monitoring stream. By default, None. Options:
3694
-
3695
- * ``"v3io"`` - for v3io stream, pass ``"v3io"`` and the system will generate
3696
- the exact path.
3697
- * Kafka - for Kafka stream, provide the full connection string without acustom
3698
- topic, for example ``"kafka://<some_kafka_broker>:<port>"``.
3699
- :param tsdb_connection: (Deprecated) Connection string to the time series database. By default, None.
3700
- Options:
3701
-
3702
- * v3io - for v3io stream, pass ``"v3io"`` and the system will generate the
3703
- exact path.
3704
- * TDEngine - for TDEngine tsdb, provide the full websocket connection URL,
3705
- for example ``"taosws://<username>:<password>@<host>:<port>"``.
3706
- :param replace_creds: If True, will override the existing credentials.
3707
- Please keep in mind that if you already enabled model monitoring on
3708
- your project this action can cause data loose and will require redeploying
3709
- all model monitoring functions & model monitoring infra
3710
- & tracked model server.
3711
- :param stream_profile_name: The datastore profile name of the stream to be used in model monitoring.
3712
- The supported profiles are:
3682
+ Set the credentials that will be used by the project's model monitoring infrastructure functions.
3683
+ Please note that you have to set the credentials before deploying any model monitoring application
3684
+ or a tracked serving function.
3713
3685
 
3714
- * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
3715
- * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
3686
+ For example, the full flow for enabling model monitoring infrastructure with **TDEngine** and **Kafka**, is:
3687
+
3688
+ .. code-block:: python
3689
+
3690
+ import mlrun
3691
+ from mlrun.datastore.datastore_profile import (
3692
+ DatastoreProfileKafkaSource,
3693
+ TDEngineDatastoreProfile,
3694
+ )
3695
+
3696
+ project = mlrun.get_or_create_project("mm-infra-setup")
3697
+
3698
+ # Create and register TSDB profile
3699
+ tsdb_profile = TDEngineDatastoreProfile(
3700
+ name="my-tdengine",
3701
+ host="<tdengine-server-ip-address>",
3702
+ port=6041,
3703
+ user="username",
3704
+ password="<tdengine-password>",
3705
+ )
3706
+ project.register_datastore_profile(tsdb_profile)
3707
+
3708
+ # Create and register stream profile
3709
+ stream_profile = DatastoreProfileKafkaSource(
3710
+ name="my-kafka",
3711
+ brokers=["<kafka-broker-ip-address>:9094"],
3712
+ topics=[], # Keep the topics list empty
3713
+ ## SASL is supported
3714
+ # sasl_user="user1",
3715
+ # sasl_pass="<kafka-sasl-password>",
3716
+ )
3717
+ project.register_datastore_profile(stream_profile)
3718
+
3719
+ # Set model monitoring credentials and enable the infrastructure
3720
+ project.set_model_monitoring_credentials(
3721
+ tsdb_profile_name=tsdb_profile.name,
3722
+ stream_profile_name=stream_profile.name,
3723
+ )
3724
+ project.enable_model_monitoring()
3725
+
3726
+ Note that you will need to change the profiles if you want to use **V3IO** TSDB and stream:
3727
+
3728
+ .. code-block:: python
3729
+
3730
+ from mlrun.datastore.datastore_profile import DatastoreProfileV3io
3731
+
3732
+ # Create and register TSDB profile
3733
+ tsdb_profile = DatastoreProfileV3io(
3734
+ name="my-v3io-tsdb",
3735
+ )
3736
+ project.register_datastore_profile(tsdb_profile)
3737
+
3738
+ # Create and register stream profile
3739
+ stream_profile = DatastoreProfileV3io(
3740
+ name="my-v3io-stream",
3741
+ v3io_access_key=mlrun.mlconf.get_v3io_access_key(),
3742
+ )
3743
+ project.register_datastore_profile(stream_profile)
3744
+
3745
+ In the V3IO datastore, you must provide an explicit access key to the stream, but not to the TSDB.
3716
3746
 
3717
- You need to register one of them, and pass the profile's name.
3718
3747
  :param tsdb_profile_name: The datastore profile name of the time-series database to be used in model
3719
3748
  monitoring. The supported profiles are:
3720
3749
 
@@ -3722,76 +3751,24 @@ class MlrunProject(ModelObj):
3722
3751
  * :py:class:`~mlrun.datastore.datastore_profile.TDEngineDatastoreProfile`
3723
3752
 
3724
3753
  You need to register one of them, and pass the profile's name.
3725
- """
3726
- db = mlrun.db.get_run_db(secrets=self._secrets)
3727
-
3728
- if tsdb_connection:
3729
- warnings.warn(
3730
- "The `tsdb_connection` argument is deprecated and will be removed in MLRun version 1.8.0. "
3731
- "Use `tsdb_profile_name` instead.",
3732
- FutureWarning,
3733
- )
3734
- if tsdb_profile_name:
3735
- raise mlrun.errors.MLRunValueError(
3736
- "If you set `tsdb_profile_name`, you must not pass `tsdb_connection`."
3737
- )
3738
- if tsdb_connection == "v3io":
3739
- tsdb_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
3740
- name=mm_constants.DefaultProfileName.TSDB
3741
- )
3742
- else:
3743
- parsed_url = urlparse(tsdb_connection)
3744
- if parsed_url.scheme != "taosws":
3745
- raise mlrun.errors.MLRunValueError(
3746
- f"Unsupported `tsdb_connection`: '{tsdb_connection}'."
3747
- )
3748
- tsdb_profile = (
3749
- mlrun.datastore.datastore_profile.TDEngineDatastoreProfile(
3750
- name=mm_constants.DefaultProfileName.TSDB,
3751
- user=parsed_url.username,
3752
- password=parsed_url.password,
3753
- host=parsed_url.hostname,
3754
- port=parsed_url.port,
3755
- )
3756
- )
3754
+ :param stream_profile_name: The datastore profile name of the stream to be used in model monitoring.
3755
+ The supported profiles are:
3757
3756
 
3758
- self.register_datastore_profile(tsdb_profile)
3759
- tsdb_profile_name = tsdb_profile.name
3757
+ * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileV3io`
3758
+ * :py:class:`~mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource`
3760
3759
 
3761
- if stream_path:
3762
- warnings.warn(
3763
- "The `stream_path` argument is deprecated and will be removed in MLRun version 1.8.0. "
3764
- "Use `stream_profile_name` instead.",
3765
- FutureWarning,
3766
- )
3767
- if stream_profile_name:
3768
- raise mlrun.errors.MLRunValueError(
3769
- "If you set `stream_profile_name`, you must not pass `stream_path`."
3770
- )
3771
- if stream_path == "v3io":
3772
- stream_profile = mlrun.datastore.datastore_profile.DatastoreProfileV3io(
3773
- name=mm_constants.DefaultProfileName.STREAM
3774
- )
3775
- else:
3776
- parsed_stream = urlparse(stream_path)
3777
- if parsed_stream.scheme != "kafka":
3778
- raise mlrun.errors.MLRunValueError(
3779
- f"Unsupported `stream_path`: '{stream_path}'."
3780
- )
3781
- stream_profile = (
3782
- mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource(
3783
- name=mm_constants.DefaultProfileName.STREAM,
3784
- brokers=[parsed_stream.netloc],
3785
- topics=[],
3786
- )
3787
- )
3788
- self.register_datastore_profile(stream_profile)
3789
- stream_profile_name = stream_profile.name
3760
+ You need to register one of them, and pass the profile's name.
3761
+ :param replace_creds: If ``True`` - override the existing credentials.
3762
+ Please keep in mind that if you have already enabled model monitoring
3763
+ on your project, replacing the credentials can cause data loss, and will
3764
+ require redeploying all the model monitoring functions, model monitoring
3765
+ infrastructure, and tracked model servers.
3766
+ """
3767
+ db = mlrun.db.get_run_db(secrets=self._secrets)
3790
3768
 
3791
3769
  db.set_model_monitoring_credentials(
3792
3770
  project=self.name,
3793
3771
  credentials={
3794
- "access_key": access_key,
3795
3772
  "tsdb_profile_name": tsdb_profile_name,
3796
3773
  "stream_profile_name": stream_profile_name,
3797
3774
  },
mlrun/serving/states.py CHANGED
@@ -812,8 +812,8 @@ class RouterStep(TaskStep):
812
812
  * **archive**:
813
813
  1. If model endpoints with the same name exist, preserve them.
814
814
  2. Create a new model endpoint with the same name and set it to `latest`.
815
-
816
815
  """
816
+
817
817
  if len(self.routes.keys()) >= MAX_MODELS_PER_ROUTER and key not in self.routes:
818
818
  raise mlrun.errors.MLRunModelLimitExceededError(
819
819
  f"Router cannot support more than {MAX_MODELS_PER_ROUTER} model endpoints. "