mlrun 1.5.0rc1__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (119) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +1 -40
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/functions.py +6 -1
  7. mlrun/api/api/endpoints/logs.py +17 -3
  8. mlrun/api/api/endpoints/pipelines.py +1 -5
  9. mlrun/api/api/endpoints/projects.py +88 -0
  10. mlrun/api/api/endpoints/runs.py +48 -6
  11. mlrun/api/api/endpoints/workflows.py +355 -0
  12. mlrun/api/api/utils.py +1 -1
  13. mlrun/api/crud/__init__.py +1 -0
  14. mlrun/api/crud/client_spec.py +3 -0
  15. mlrun/api/crud/model_monitoring/deployment.py +36 -7
  16. mlrun/api/crud/model_monitoring/grafana.py +1 -1
  17. mlrun/api/crud/model_monitoring/helpers.py +32 -2
  18. mlrun/api/crud/model_monitoring/model_endpoints.py +27 -5
  19. mlrun/api/crud/notifications.py +9 -4
  20. mlrun/api/crud/pipelines.py +4 -9
  21. mlrun/api/crud/runtime_resources.py +4 -3
  22. mlrun/api/crud/secrets.py +21 -0
  23. mlrun/api/crud/workflows.py +352 -0
  24. mlrun/api/db/base.py +16 -1
  25. mlrun/api/db/sqldb/db.py +97 -16
  26. mlrun/api/launcher.py +26 -7
  27. mlrun/api/main.py +3 -4
  28. mlrun/{mlutils → api/rundb}/__init__.py +2 -6
  29. mlrun/{db → api/rundb}/sqldb.py +35 -83
  30. mlrun/api/runtime_handlers/__init__.py +56 -0
  31. mlrun/api/runtime_handlers/base.py +1247 -0
  32. mlrun/api/runtime_handlers/daskjob.py +209 -0
  33. mlrun/api/runtime_handlers/kubejob.py +37 -0
  34. mlrun/api/runtime_handlers/mpijob.py +147 -0
  35. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  36. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  37. mlrun/api/utils/builder.py +1 -4
  38. mlrun/api/utils/clients/chief.py +14 -0
  39. mlrun/api/utils/scheduler.py +98 -15
  40. mlrun/api/utils/singletons/db.py +4 -0
  41. mlrun/artifacts/manager.py +1 -2
  42. mlrun/common/schemas/__init__.py +6 -0
  43. mlrun/common/schemas/auth.py +4 -1
  44. mlrun/common/schemas/client_spec.py +1 -1
  45. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  46. mlrun/common/schemas/model_monitoring/constants.py +11 -0
  47. mlrun/common/schemas/project.py +1 -0
  48. mlrun/common/schemas/runs.py +1 -8
  49. mlrun/common/schemas/schedule.py +1 -8
  50. mlrun/common/schemas/workflow.py +54 -0
  51. mlrun/config.py +42 -40
  52. mlrun/datastore/sources.py +1 -1
  53. mlrun/db/__init__.py +4 -68
  54. mlrun/db/base.py +12 -0
  55. mlrun/db/factory.py +65 -0
  56. mlrun/db/httpdb.py +175 -19
  57. mlrun/db/nopdb.py +4 -2
  58. mlrun/execution.py +4 -2
  59. mlrun/feature_store/__init__.py +1 -0
  60. mlrun/feature_store/api.py +1 -2
  61. mlrun/feature_store/feature_set.py +0 -10
  62. mlrun/feature_store/feature_vector.py +340 -2
  63. mlrun/feature_store/ingestion.py +5 -10
  64. mlrun/feature_store/retrieval/base.py +118 -104
  65. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  66. mlrun/feature_store/retrieval/job.py +4 -1
  67. mlrun/feature_store/retrieval/local_merger.py +18 -18
  68. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  69. mlrun/feature_store/retrieval/storey_merger.py +21 -15
  70. mlrun/kfpops.py +3 -9
  71. mlrun/launcher/base.py +3 -3
  72. mlrun/launcher/client.py +3 -2
  73. mlrun/launcher/factory.py +16 -13
  74. mlrun/lists.py +0 -11
  75. mlrun/model.py +9 -15
  76. mlrun/model_monitoring/helpers.py +15 -25
  77. mlrun/model_monitoring/model_monitoring_batch.py +72 -4
  78. mlrun/model_monitoring/prometheus.py +219 -0
  79. mlrun/model_monitoring/stores/__init__.py +15 -9
  80. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +3 -1
  81. mlrun/model_monitoring/stream_processing.py +181 -29
  82. mlrun/package/packager.py +6 -8
  83. mlrun/package/packagers/default_packager.py +121 -10
  84. mlrun/platforms/__init__.py +0 -2
  85. mlrun/platforms/iguazio.py +0 -56
  86. mlrun/projects/pipelines.py +57 -158
  87. mlrun/projects/project.py +6 -32
  88. mlrun/render.py +1 -1
  89. mlrun/run.py +2 -124
  90. mlrun/runtimes/__init__.py +6 -42
  91. mlrun/runtimes/base.py +26 -1241
  92. mlrun/runtimes/daskjob.py +2 -198
  93. mlrun/runtimes/function.py +16 -5
  94. mlrun/runtimes/kubejob.py +5 -29
  95. mlrun/runtimes/mpijob/__init__.py +2 -2
  96. mlrun/runtimes/mpijob/abstract.py +10 -1
  97. mlrun/runtimes/mpijob/v1.py +0 -76
  98. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  99. mlrun/runtimes/nuclio.py +3 -2
  100. mlrun/runtimes/pod.py +0 -10
  101. mlrun/runtimes/remotesparkjob.py +1 -15
  102. mlrun/runtimes/serving.py +1 -1
  103. mlrun/runtimes/sparkjob/__init__.py +0 -1
  104. mlrun/runtimes/sparkjob/abstract.py +4 -131
  105. mlrun/serving/states.py +1 -1
  106. mlrun/utils/db.py +0 -2
  107. mlrun/utils/helpers.py +19 -13
  108. mlrun/utils/notifications/notification_pusher.py +5 -25
  109. mlrun/utils/regex.py +7 -2
  110. mlrun/utils/version/version.json +2 -2
  111. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +24 -23
  112. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +116 -107
  113. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  114. mlrun/mlutils/data.py +0 -160
  115. mlrun/mlutils/models.py +0 -78
  116. mlrun/mlutils/plots.py +0 -902
  117. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  118. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  119. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
@@ -41,8 +41,9 @@ class SparkFeatureMerger(BaseMerger):
41
41
  self,
42
42
  entity_df,
43
43
  entity_timestamp_column: str,
44
- featureset,
45
- featureset_df,
44
+ featureset_name: str,
45
+ featureset_timstamp: str,
46
+ featureset_df: list,
46
47
  left_keys: list,
47
48
  right_keys: list,
48
49
  ):
@@ -54,13 +55,15 @@ class SparkFeatureMerger(BaseMerger):
54
55
  the feature tables.
55
56
  entity_timestamp_column (str): Column name in entity_df which represents
56
57
  event timestamp.
57
- featureset_df (Dataframe): Spark dataframe representing the feature table.
58
+ featureset (Dataframe): Spark dataframe representing the feature table.
58
59
  featureset (FeatureSet): Feature set specification, which provides information on
59
60
  how the join should be performed, such as the entity primary keys.
60
61
  Returns:
61
62
  DataFrame: Join result, which contains all the original columns from entity_df, as well
62
63
  as all the features specified in featureset, where the feature columns will
63
64
  be prefixed with featureset_df name.
65
+ :param featureset_name:
66
+ :param featureset_timstamp:
64
67
  """
65
68
 
66
69
  from pyspark.sql import Window
@@ -68,7 +71,7 @@ class SparkFeatureMerger(BaseMerger):
68
71
 
69
72
  entity_with_id = entity_df.withColumn("_row_nr", monotonically_increasing_id())
70
73
  rename_right_keys = {}
71
- for key in right_keys + [featureset.spec.timestamp_key]:
74
+ for key in right_keys + [featureset_timstamp]:
72
75
  if key in entity_df.columns:
73
76
  rename_right_keys[key] = f"ft__{key}"
74
77
  # get columns for projection
@@ -79,7 +82,7 @@ class SparkFeatureMerger(BaseMerger):
79
82
 
80
83
  aliased_featureset_df = featureset_df.select(projection)
81
84
  right_timestamp = rename_right_keys.get(
82
- featureset.spec.timestamp_key, featureset.spec.timestamp_key
85
+ featureset_timstamp, featureset_timstamp
83
86
  )
84
87
 
85
88
  # set join conditions
@@ -106,7 +109,7 @@ class SparkFeatureMerger(BaseMerger):
106
109
  "_rank", row_number().over(window)
107
110
  ).filter(col("_rank") == 1)
108
111
 
109
- for key in right_keys + [featureset.spec.timestamp_key]:
112
+ for key in right_keys + [featureset_timstamp]:
110
113
  if key in entity_df.columns + [entity_timestamp_column]:
111
114
  filter_most_recent_feature_timestamp = (
112
115
  filter_most_recent_feature_timestamp.drop(
@@ -121,7 +124,8 @@ class SparkFeatureMerger(BaseMerger):
121
124
  self,
122
125
  entity_df,
123
126
  entity_timestamp_column: str,
124
- featureset,
127
+ featureset_name,
128
+ featureset_timestamp,
125
129
  featureset_df,
126
130
  left_keys: list,
127
131
  right_keys: list,
@@ -130,20 +134,18 @@ class SparkFeatureMerger(BaseMerger):
130
134
  """
131
135
  spark dataframes join
132
136
 
133
- Args:
134
- entity_df (DataFrame): Spark dataframe representing the entities, to be joined with
137
+ :param entity_df (DataFrame): Spark dataframe representing the entities, to be joined with
135
138
  the feature tables.
136
- entity_timestamp_column (str): Column name in entity_df which represents
139
+ :param entity_timestamp_column (str): Column name in entity_df which represents
137
140
  event timestamp.
138
- featureset_df (Dataframe): Spark dataframe representing the feature table.
139
- featureset (FeatureSet): Feature set specification, which provide information on
140
- how the join should be performed, such as the entity primary keys.
141
+ :param featureset_df (Dataframe): Spark dataframe representing the feature table.
142
+ :param featureset_name:
143
+ :param featureset_timestamp:
141
144
 
142
145
  Returns:
143
146
  DataFrame: Join result, which contains all the original columns from entity_df, as well
144
147
  as all the features specified in featureset, where the feature columns will
145
148
  be prefixed with featureset_df name.
146
-
147
149
  """
148
150
  if left_keys != right_keys:
149
151
  join_cond = [
@@ -270,3 +272,8 @@ class SparkFeatureMerger(BaseMerger):
270
272
  self._result_df = self._result_df.orderBy(
271
273
  *[col(col_name).asc_nulls_last() for col_name in order_by_active]
272
274
  )
275
+
276
+ def _convert_entity_rows_to_engine_df(self, entity_rows):
277
+ if entity_rows is not None and not hasattr(entity_rows, "rdd"):
278
+ return self.spark.createDataFrame(entity_rows)
279
+ return entity_rows
@@ -41,45 +41,45 @@ class StoreyFeatureMerger(BaseMerger):
41
41
  )
42
42
  next = graph
43
43
 
44
- fs_link_list = self._create_linked_relation_list(
44
+ join_graph = self._get_graph(
45
45
  feature_set_objects, feature_set_fields, entity_keys
46
46
  )
47
47
 
48
48
  all_columns = []
49
49
  save_column = []
50
50
  entity_keys = []
51
+ del_columns = []
51
52
  end_aliases = {}
52
- for node in fs_link_list:
53
- name = node.name
54
- if name == self._entity_rows_node_name:
55
- continue
56
- featureset = feature_set_objects[name]
53
+ for step in join_graph.steps:
54
+ name = step.right_feature_set_name
55
+ feature_set = feature_set_objects[name]
57
56
  columns = feature_set_fields[name]
58
57
  column_names = [name for name, alias in columns]
59
58
  aliases = {name: alias for name, alias in columns if alias}
60
59
  all_columns += [aliases.get(name, name) for name in column_names]
61
- for col in node.data["save_cols"]:
60
+ saved_columns_for_relation = list(
61
+ self.vector.get_feature_set_relations(feature_set).keys()
62
+ )
63
+
64
+ for col in saved_columns_for_relation:
62
65
  if col not in column_names:
63
66
  column_names.append(col)
67
+ del_columns.append(col)
64
68
  else:
65
69
  save_column.append(col)
66
70
 
67
- entity_list = node.data["right_keys"] or list(
68
- featureset.spec.entities.keys()
69
- )
71
+ entity_list = step.right_keys or list(feature_set.spec.entities.keys())
70
72
  if not entity_keys:
71
73
  # if entity_keys not provided by the user we will set it to be the entity of the first feature set
72
74
  entity_keys = entity_list
73
75
  end_aliases.update(
74
76
  {
75
77
  k: v
76
- for k, v in zip(entity_list, node.data["left_keys"])
78
+ for k, v in zip(entity_list, step.left_keys)
77
79
  if k != v and v in save_column
78
80
  }
79
81
  )
80
- mapping = {
81
- k: v for k, v in zip(node.data["left_keys"], entity_list) if k != v
82
- }
82
+ mapping = {k: v for k, v in zip(step.left_keys, entity_list) if k != v}
83
83
  if mapping:
84
84
  next = next.to(
85
85
  "storey.Rename",
@@ -91,7 +91,7 @@ class StoreyFeatureMerger(BaseMerger):
91
91
  "storey.QueryByKey",
92
92
  f"query-{name}",
93
93
  features=column_names,
94
- table=featureset.uri,
94
+ table=feature_set.uri,
95
95
  key_field=entity_list,
96
96
  aliases=aliases,
97
97
  fixed_window_type=fixed_window_type.to_qbk_fixed_window_type(),
@@ -103,6 +103,12 @@ class StoreyFeatureMerger(BaseMerger):
103
103
  "rename-entity-to-features",
104
104
  mapping=end_aliases,
105
105
  )
106
+ if del_columns:
107
+ next = next.to(
108
+ "storey.flow.DropColumns",
109
+ "drop-unnecessary-columns",
110
+ columns=del_columns,
111
+ )
106
112
  for name in start_states:
107
113
  next.set_next(name)
108
114
 
mlrun/kfpops.py CHANGED
@@ -26,7 +26,6 @@ import mlrun
26
26
  from mlrun.errors import err_to_str
27
27
 
28
28
  from .config import config
29
- from .db import get_or_set_dburl, get_run_db
30
29
  from .model import HyperParamOptions, RunSpec
31
30
  from .utils import (
32
31
  dict_to_yaml,
@@ -297,7 +296,7 @@ def mlrun_op(
297
296
  outputs = [] if outputs is None else outputs
298
297
  labels = {} if labels is None else labels
299
298
 
300
- rundb = rundb or get_or_set_dburl()
299
+ rundb = rundb or mlrun.db.get_or_set_dburl()
301
300
  cmd = [
302
301
  "python",
303
302
  "-m",
@@ -732,7 +731,7 @@ def generate_kfp_dag_and_resolve_project(run, project=None):
732
731
  return dag, project, workflow["status"].get("message", "")
733
732
 
734
733
 
735
- def format_summary_from_kfp_run(kfp_run, project=None, session=None):
734
+ def format_summary_from_kfp_run(kfp_run, project=None):
736
735
  override_project = project if project and project != "*" else None
737
736
  dag, project, message = generate_kfp_dag_and_resolve_project(
738
737
  kfp_run, override_project
@@ -740,12 +739,7 @@ def format_summary_from_kfp_run(kfp_run, project=None, session=None):
740
739
  run_id = get_in(kfp_run, "run.id")
741
740
 
742
741
  # enrich DAG with mlrun run info
743
- if session:
744
- runs = mlrun.api.utils.singletons.db.get_db().list_runs(
745
- session, project=project, labels=f"workflow={run_id}"
746
- )
747
- else:
748
- runs = get_run_db().list_runs(project=project, labels=f"workflow={run_id}")
742
+ runs = mlrun.db.get_run_db().list_runs(project=project, labels=f"workflow={run_id}")
749
743
 
750
744
  for run in runs:
751
745
  step = get_in(run, ["metadata", "labels", "mlrun/runner-pod"])
mlrun/launcher/base.py CHANGED
@@ -74,9 +74,9 @@ class BaseLauncher(abc.ABC):
74
74
  """run the function from the server/client[local/remote]"""
75
75
  pass
76
76
 
77
- @staticmethod
78
77
  @abc.abstractmethod
79
78
  def enrich_runtime(
79
+ self,
80
80
  runtime: "mlrun.runtimes.base.BaseRuntime",
81
81
  project_name: Optional[str] = "",
82
82
  ):
@@ -385,8 +385,8 @@ class BaseLauncher(abc.ABC):
385
385
  name=run.metadata.name,
386
386
  )
387
387
  if run.status.state in [
388
- mlrun.runtimes.base.RunStates.error,
389
- mlrun.runtimes.base.RunStates.aborted,
388
+ mlrun.runtimes.constants.RunStates.error,
389
+ mlrun.runtimes.constants.RunStates.aborted,
390
390
  ]:
391
391
  if runtime._is_remote and not runtime.is_child:
392
392
  logger.error(
mlrun/launcher/client.py CHANGED
@@ -31,9 +31,10 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
31
31
  Abstract class for common code between client launchers
32
32
  """
33
33
 
34
- @staticmethod
35
34
  def enrich_runtime(
36
- runtime: "mlrun.runtimes.base.BaseRuntime", project_name: Optional[str] = ""
35
+ self,
36
+ runtime: "mlrun.runtimes.base.BaseRuntime",
37
+ project_name: Optional[str] = "",
37
38
  ):
38
39
  runtime.try_auto_mount_based_on_config()
39
40
  runtime._fill_credentials()
mlrun/launcher/factory.py CHANGED
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from typing import Optional, Type
14
+ from dependency_injector import containers, providers
15
15
 
16
16
  import mlrun.config
17
17
  import mlrun.errors
@@ -25,7 +25,7 @@ class LauncherFactory(
25
25
  metaclass=mlrun.utils.singleton.AbstractSingleton,
26
26
  ):
27
27
  def __init__(self):
28
- self._launcher_cls: Optional[Type[mlrun.launcher.base.BaseLauncher]] = None
28
+ self._launcher_container = LauncherContainer()
29
29
 
30
30
  def create_launcher(
31
31
  self, is_remote: bool, **kwargs
@@ -40,19 +40,22 @@ class LauncherFactory(
40
40
 
41
41
  :return: The appropriate launcher for the specified run.
42
42
  """
43
- if self._launcher_cls:
44
- return self._launcher_cls(**kwargs)
43
+ if mlrun.config.is_running_as_api():
44
+ return self._launcher_container.server_side_launcher(**kwargs)
45
45
 
46
46
  local = kwargs.get("local", False)
47
47
  if is_remote and not local:
48
- return mlrun.launcher.remote.ClientRemoteLauncher(**kwargs)
48
+ return self._launcher_container.client_remote_launcher(**kwargs)
49
49
 
50
- return mlrun.launcher.local.ClientLocalLauncher(**kwargs)
50
+ return self._launcher_container.client_local_launcher(**kwargs)
51
51
 
52
- def set_launcher(self, launcher_cls: Type[mlrun.launcher.base.BaseLauncher]):
53
- """
54
- Launcher setter for injection of a custom launcher.
55
- This allows us to override the launcher from external packages without having to import them.
56
- :param launcher_cls: The launcher class to use.
57
- """
58
- self._launcher_cls = launcher_cls
52
+
53
+ class LauncherContainer(containers.DeclarativeContainer):
54
+ client_remote_launcher = providers.Factory(
55
+ mlrun.launcher.remote.ClientRemoteLauncher
56
+ )
57
+ client_local_launcher = providers.Factory(mlrun.launcher.local.ClientLocalLauncher)
58
+
59
+ # Provider for injection of a server side launcher.
60
+ # This allows us to override the launcher from external packages without having to import them.
61
+ server_side_launcher = providers.Factory(mlrun.launcher.base.BaseLauncher)
mlrun/lists.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import warnings
15
14
  from copy import copy
16
15
  from typing import List
17
16
 
@@ -220,16 +219,6 @@ class ArtifactList(list):
220
219
  """return as a list of artifact objects"""
221
220
  return [dict_to_artifact(artifact) for artifact in self]
222
221
 
223
- def objects(self) -> List[Artifact]:
224
- """return as a list of artifact objects"""
225
- warnings.warn(
226
- "'objects' is deprecated in 1.3.0 and will be removed in 1.5.0. "
227
- "Use 'to_objects' instead.",
228
- # TODO: remove in 1.5.0
229
- FutureWarning,
230
- )
231
- return [dict_to_artifact(artifact) for artifact in self]
232
-
233
222
  def dataitems(self) -> List["mlrun.DataItem"]:
234
223
  """return as a list of DataItem objects"""
235
224
  dataitems = []
mlrun/model.py CHANGED
@@ -137,6 +137,8 @@ class ModelObj:
137
137
 
138
138
  # model class for building ModelObj dictionaries
139
139
  class ObjectDict:
140
+ kind = "object_dict"
141
+
140
142
  def __init__(self, classes_map, default_kind=""):
141
143
  self._children = OrderedDict()
142
144
  self._default_kind = default_kind
@@ -1309,25 +1311,17 @@ class RunObject(RunTemplate):
1309
1311
  """return or watch on the run logs"""
1310
1312
  if not db:
1311
1313
  db = mlrun.get_run_db()
1314
+
1312
1315
  if not db:
1313
- print("DB is not configured, cannot show logs")
1316
+ logger.warning("DB is not configured, cannot show logs")
1314
1317
  return None
1315
1318
 
1316
- new_offset = 0
1317
- if db.kind == "http":
1318
- state, new_offset = db.watch_log(
1319
- self.metadata.uid, self.metadata.project, watch=watch, offset=offset
1320
- )
1321
- # not expected to reach this else, as FileDB is not supported any more and because we don't watch logs on API
1322
- else:
1323
- state, text = db.get_log(
1324
- self.metadata.uid, self.metadata.project, offset=offset
1325
- )
1326
- if text:
1327
- print(text.decode())
1328
-
1319
+ state, new_offset = db.watch_log(
1320
+ self.metadata.uid, self.metadata.project, watch=watch, offset=offset
1321
+ )
1329
1322
  if state:
1330
- print(f"final state: {state}")
1323
+ logger.debug("Run reached terminal state", state=state)
1324
+
1331
1325
  return state, new_offset
1332
1326
 
1333
1327
  def wait_for_completion(
@@ -14,9 +14,10 @@
14
14
  #
15
15
 
16
16
 
17
+ import typing
18
+
17
19
  import mlrun.common.model_monitoring.helpers
18
20
  import mlrun.common.schemas
19
- from mlrun.config import is_running_as_api
20
21
 
21
22
 
22
23
  def get_stream_path(project: str = None):
@@ -35,31 +36,20 @@ def get_stream_path(project: str = None):
35
36
  )
36
37
 
37
38
 
38
- def get_connection_string(project: str = None):
39
- """Get endpoint store connection string from the project secret.
40
- If wasn't set, take it from the system configurations"""
39
+ def get_connection_string(secret_provider: typing.Callable = None) -> str:
40
+ """Get endpoint store connection string from the project secret. If wasn't set, take it from the system
41
+ configurations.
41
42
 
42
- if is_running_as_api():
43
- # Running on API server side
44
- import mlrun.api.crud.secrets
45
- import mlrun.common.schemas
43
+ :param secret_provider: An optional secret provider to get the connection string secret.
46
44
 
47
- return (
48
- mlrun.api.crud.secrets.Secrets().get_project_secret(
49
- project=project,
50
- provider=mlrun.common.schemas.secret.SecretProviderName.kubernetes,
51
- allow_secrets_from_k8s=True,
52
- secret_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
53
- )
54
- or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
55
- )
56
- else:
57
- # Running on stream server side
58
- import mlrun
45
+ :return: Valid SQL connection string.
59
46
 
60
- return (
61
- mlrun.get_secret_or_env(
62
- mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION
63
- )
64
- or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
47
+ """
48
+
49
+ return (
50
+ mlrun.get_secret_or_env(
51
+ key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
52
+ secret_provider=secret_provider,
65
53
  )
54
+ or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
55
+ )
@@ -23,6 +23,7 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
23
23
 
24
24
  import numpy as np
25
25
  import pandas as pd
26
+ import v3io
26
27
  import v3io.dataplane
27
28
  import v3io_frames
28
29
 
@@ -803,7 +804,7 @@ class BatchProcessor:
803
804
 
804
805
  if not mlrun.mlconf.is_ce_mode():
805
806
  # Update drift results in TSDB
806
- self._update_drift_in_input_stream(
807
+ self._update_drift_in_v3io_tsdb(
807
808
  endpoint_id=endpoint[
808
809
  mlrun.common.schemas.model_monitoring.EventFieldType.UID
809
810
  ],
@@ -812,11 +813,15 @@ class BatchProcessor:
812
813
  drift_result=drift_result,
813
814
  timestamp=timestamp,
814
815
  )
815
- logger.info(
816
- "Done updating drift measures",
816
+
817
+ else:
818
+ # Update drift results in Prometheus
819
+ self._update_drift_in_prometheus(
817
820
  endpoint_id=endpoint[
818
821
  mlrun.common.schemas.model_monitoring.EventFieldType.UID
819
822
  ],
823
+ drift_status=drift_status,
824
+ drift_result=drift_result,
820
825
  )
821
826
 
822
827
  except Exception as e:
@@ -824,6 +829,12 @@ class BatchProcessor:
824
829
  f"Exception for endpoint {endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]}"
825
830
  )
826
831
  self.exception = e
832
+ logger.info(
833
+ "Done updating drift measures",
834
+ endpoint_id=endpoint[
835
+ mlrun.common.schemas.model_monitoring.EventFieldType.UID
836
+ ],
837
+ )
827
838
 
828
839
  def _get_interval_range(self) -> Tuple[datetime.datetime, datetime.datetime]:
829
840
  """Getting batch interval time range"""
@@ -852,7 +863,7 @@ class BatchProcessor:
852
863
  pair_list = pair.split(":")
853
864
  self.batch_dict[pair_list[0]] = float(pair_list[1])
854
865
 
855
- def _update_drift_in_input_stream(
866
+ def _update_drift_in_v3io_tsdb(
856
867
  self,
857
868
  endpoint_id: str,
858
869
  drift_status: DriftStatus,
@@ -920,6 +931,63 @@ class BatchProcessor:
920
931
  endpoint=endpoint_id,
921
932
  )
922
933
 
934
+ def _update_drift_in_prometheus(
935
+ self,
936
+ endpoint_id: str,
937
+ drift_status: DriftStatus,
938
+ drift_result: Dict[str, Dict[str, Any]],
939
+ ):
940
+ """Push drift metrics to Prometheus registry. Please note that the metrics are being pushed through HTTP
941
+ to the monitoring stream pod that writes them into a local registry. Afterwards, Prometheus wil scrape these
942
+ metrics that will be available in the Grafana charts.
943
+
944
+ :param endpoint_id: The unique id of the model endpoint.
945
+ :param drift_status: Drift status result. Possible values can be found under DriftStatus enum class.
946
+ :param drift_result: A dictionary that includes the drift results for each feature.
947
+
948
+
949
+ """
950
+ stream_http_path = (
951
+ mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
952
+ project=self.project
953
+ )
954
+ )
955
+
956
+ statistical_metrics = ["hellinger_mean", "tvd_mean", "kld_mean"]
957
+ metrics = []
958
+ for metric in statistical_metrics:
959
+ metrics.append(
960
+ {
961
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: endpoint_id,
962
+ mlrun.common.schemas.model_monitoring.EventFieldType.METRIC: metric,
963
+ mlrun.common.schemas.model_monitoring.EventFieldType.VALUE: drift_result[
964
+ metric
965
+ ],
966
+ }
967
+ )
968
+
969
+ http_session = mlrun.utils.HTTPSessionWithRetry(
970
+ retry_on_post=True,
971
+ verbose=True,
972
+ )
973
+
974
+ http_session.request(
975
+ method="POST",
976
+ url=stream_http_path + "/monitoring-batch-metrics",
977
+ data=json.dumps(metrics),
978
+ )
979
+
980
+ drift_status_dict = {
981
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: endpoint_id,
982
+ mlrun.common.schemas.model_monitoring.EventFieldType.DRIFT_STATUS: drift_status.value,
983
+ }
984
+
985
+ http_session.request(
986
+ method="POST",
987
+ url=stream_http_path + "/monitoring-drift-status",
988
+ data=json.dumps(drift_status_dict),
989
+ )
990
+
923
991
 
924
992
  def handler(context: mlrun.run.MLClientCtx):
925
993
  batch_processor = BatchProcessor(