mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show
  1. mlrun/api/api/deps.py +14 -1
  2. mlrun/api/api/endpoints/frontend_spec.py +0 -2
  3. mlrun/api/api/endpoints/functions.py +15 -27
  4. mlrun/api/api/endpoints/grafana_proxy.py +435 -74
  5. mlrun/api/api/endpoints/healthz.py +5 -18
  6. mlrun/api/api/endpoints/model_endpoints.py +33 -37
  7. mlrun/api/api/utils.py +6 -13
  8. mlrun/api/crud/__init__.py +14 -16
  9. mlrun/api/crud/logs.py +5 -7
  10. mlrun/api/crud/model_monitoring/__init__.py +2 -2
  11. mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
  12. mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
  13. mlrun/api/crud/pipelines.py +2 -3
  14. mlrun/api/db/sqldb/models/models_mysql.py +52 -19
  15. mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
  16. mlrun/api/db/sqldb/session.py +19 -26
  17. mlrun/api/schemas/__init__.py +2 -0
  18. mlrun/api/schemas/constants.py +0 -13
  19. mlrun/api/schemas/frontend_spec.py +0 -1
  20. mlrun/api/schemas/model_endpoints.py +38 -195
  21. mlrun/api/schemas/schedule.py +2 -2
  22. mlrun/api/utils/clients/log_collector.py +5 -0
  23. mlrun/builder.py +9 -41
  24. mlrun/config.py +1 -76
  25. mlrun/data_types/__init__.py +1 -6
  26. mlrun/data_types/data_types.py +1 -3
  27. mlrun/datastore/__init__.py +2 -9
  28. mlrun/datastore/sources.py +20 -25
  29. mlrun/datastore/store_resources.py +1 -1
  30. mlrun/datastore/targets.py +34 -67
  31. mlrun/datastore/utils.py +4 -26
  32. mlrun/db/base.py +2 -4
  33. mlrun/db/filedb.py +5 -13
  34. mlrun/db/httpdb.py +32 -64
  35. mlrun/db/sqldb.py +2 -4
  36. mlrun/errors.py +0 -5
  37. mlrun/execution.py +0 -2
  38. mlrun/feature_store/api.py +8 -24
  39. mlrun/feature_store/feature_set.py +6 -28
  40. mlrun/feature_store/feature_vector.py +0 -2
  41. mlrun/feature_store/ingestion.py +11 -8
  42. mlrun/feature_store/retrieval/base.py +43 -271
  43. mlrun/feature_store/retrieval/dask_merger.py +153 -55
  44. mlrun/feature_store/retrieval/job.py +3 -12
  45. mlrun/feature_store/retrieval/local_merger.py +130 -48
  46. mlrun/feature_store/retrieval/spark_merger.py +125 -126
  47. mlrun/features.py +2 -7
  48. mlrun/model_monitoring/constants.py +6 -48
  49. mlrun/model_monitoring/helpers.py +35 -118
  50. mlrun/model_monitoring/model_monitoring_batch.py +260 -293
  51. mlrun/model_monitoring/stream_processing_fs.py +253 -220
  52. mlrun/platforms/iguazio.py +0 -33
  53. mlrun/projects/project.py +72 -34
  54. mlrun/runtimes/base.py +0 -5
  55. mlrun/runtimes/daskjob.py +0 -2
  56. mlrun/runtimes/function.py +3 -29
  57. mlrun/runtimes/kubejob.py +15 -39
  58. mlrun/runtimes/local.py +45 -7
  59. mlrun/runtimes/mpijob/abstract.py +0 -2
  60. mlrun/runtimes/mpijob/v1.py +0 -2
  61. mlrun/runtimes/pod.py +0 -2
  62. mlrun/runtimes/remotesparkjob.py +0 -2
  63. mlrun/runtimes/serving.py +0 -6
  64. mlrun/runtimes/sparkjob/abstract.py +2 -39
  65. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  66. mlrun/serving/__init__.py +1 -2
  67. mlrun/serving/routers.py +35 -35
  68. mlrun/serving/server.py +12 -22
  69. mlrun/serving/states.py +30 -162
  70. mlrun/serving/v2_serving.py +10 -13
  71. mlrun/utils/clones.py +1 -1
  72. mlrun/utils/model_monitoring.py +96 -122
  73. mlrun/utils/version/version.json +2 -2
  74. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
  75. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
  76. mlrun/api/crud/model_monitoring/grafana.py +0 -427
  77. mlrun/datastore/spark_udf.py +0 -40
  78. mlrun/model_monitoring/__init__.py +0 -44
  79. mlrun/model_monitoring/common.py +0 -112
  80. mlrun/model_monitoring/model_endpoint.py +0 -141
  81. mlrun/model_monitoring/stores/__init__.py +0 -106
  82. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
  83. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  84. mlrun/model_monitoring/stores/models/__init__.py +0 -23
  85. mlrun/model_monitoring/stores/models/base.py +0 -18
  86. mlrun/model_monitoring/stores/models/mysql.py +0 -100
  87. mlrun/model_monitoring/stores/models/sqlite.py +0 -98
  88. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
  89. mlrun/utils/db.py +0 -52
  90. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
  91. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
  92. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
  93. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
@@ -12,12 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
-
16
15
  import mlrun
17
16
  from mlrun.datastore.targets import get_offline_target
18
17
 
19
18
  from ...runtimes import RemoteSparkRuntime
20
19
  from ...runtimes.sparkjob.abstract import AbstractSparkRuntime
20
+ from ..feature_vector import OfflineVectorResponse
21
21
  from .base import BaseMerger
22
22
 
23
23
 
@@ -33,6 +33,109 @@ class SparkFeatureMerger(BaseMerger):
33
33
  def to_spark_df(self, session, path):
34
34
  return session.read.load(path)
35
35
 
36
+ def _generate_vector(
37
+ self,
38
+ entity_rows,
39
+ entity_timestamp_column,
40
+ feature_set_objects,
41
+ feature_set_fields,
42
+ start_time=None,
43
+ end_time=None,
44
+ query=None,
45
+ ):
46
+ from pyspark.sql import SparkSession
47
+ from pyspark.sql.functions import col
48
+
49
+ if self.spark is None:
50
+ # create spark context
51
+ self.spark = SparkSession.builder.appName(
52
+ f"vector-merger-{self.vector.metadata.name}"
53
+ ).getOrCreate()
54
+
55
+ feature_sets = []
56
+ dfs = []
57
+
58
+ for name, columns in feature_set_fields.items():
59
+ feature_set = feature_set_objects[name]
60
+ feature_sets.append(feature_set)
61
+ column_names = [name for name, alias in columns]
62
+
63
+ if feature_set.spec.passthrough:
64
+ if not feature_set.spec.source:
65
+ raise mlrun.errors.MLRunNotFoundError(
66
+ f"passthrough feature set {name} with no source"
67
+ )
68
+ source_kind = feature_set.spec.source.kind
69
+ source_path = feature_set.spec.source.path
70
+ else:
71
+ target = get_offline_target(feature_set)
72
+ if not target:
73
+ raise mlrun.errors.MLRunInvalidArgumentError(
74
+ f"feature set {name} does not have offline targets"
75
+ )
76
+ source_kind = target.kind
77
+ source_path = target.get_target_path()
78
+
79
+ # handling case where there are multiple feature sets and user creates vector where
80
+ # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
81
+ source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
82
+ if (
83
+ entity_timestamp_column in column_names
84
+ or feature_set.spec.timestamp_key == entity_timestamp_column
85
+ ):
86
+ source = source_driver(
87
+ name=self.vector.metadata.name,
88
+ path=source_path,
89
+ time_field=entity_timestamp_column,
90
+ start_time=start_time,
91
+ end_time=end_time,
92
+ )
93
+ else:
94
+ source = source_driver(
95
+ name=self.vector.metadata.name,
96
+ path=source_path,
97
+ time_field=entity_timestamp_column,
98
+ )
99
+
100
+ # add the index/key to selected columns
101
+ timestamp_key = feature_set.spec.timestamp_key
102
+
103
+ df = source.to_spark_df(
104
+ self.spark, named_view=self.named_view, time_field=timestamp_key
105
+ )
106
+
107
+ if timestamp_key and timestamp_key not in column_names:
108
+ columns.append((timestamp_key, None))
109
+ for entity in feature_set.spec.entities.keys():
110
+ if entity not in column_names:
111
+ columns.append((entity, None))
112
+
113
+ # select requested columns and rename with alias where needed
114
+ df = df.select([col(name).alias(alias or name) for name, alias in columns])
115
+ dfs.append(df)
116
+ del df
117
+
118
+ # convert pandas entity_rows to spark DF if needed
119
+ if entity_rows is not None and not hasattr(entity_rows, "rdd"):
120
+ entity_rows = self.spark.createDataFrame(entity_rows)
121
+
122
+ # join the feature data frames
123
+ self.merge(entity_rows, entity_timestamp_column, feature_sets, dfs)
124
+
125
+ # filter joined data frame by the query param
126
+ if query:
127
+ self._result_df = self._result_df.filter(query)
128
+
129
+ self._result_df = self._result_df.drop(*self._drop_columns)
130
+
131
+ if self.vector.status.label_column:
132
+ self._result_df = self._result_df.dropna(
133
+ subset=[self.vector.status.label_column]
134
+ )
135
+
136
+ self._write_to_target()
137
+ return OfflineVectorResponse(self)
138
+
36
139
  def _unpersist_df(self, df):
37
140
  df.unpersist()
38
141
 
@@ -44,6 +147,7 @@ class SparkFeatureMerger(BaseMerger):
44
147
  featureset_df,
45
148
  left_keys: list,
46
149
  right_keys: list,
150
+ columns: list,
47
151
  ):
48
152
 
49
153
  """Perform an as of join between entity and featureset.
@@ -66,13 +170,15 @@ class SparkFeatureMerger(BaseMerger):
66
170
  from pyspark.sql.functions import col, monotonically_increasing_id, row_number
67
171
 
68
172
  entity_with_id = entity_df.withColumn("_row_nr", monotonically_increasing_id())
69
- rename_right_keys = {}
70
- for key in right_keys + [entity_timestamp_column]:
71
- if key in entity_df.columns:
72
- rename_right_keys[key] = f"ft__{key}"
173
+ indexes = list(featureset.spec.entities.keys())
174
+
73
175
  # get columns for projection
74
176
  projection = [
75
- col(col_name).alias(rename_right_keys.get(col_name, col_name))
177
+ col(col_name).alias(
178
+ f"ft__{col_name}"
179
+ if col_name in indexes + [entity_timestamp_column]
180
+ else col_name
181
+ )
76
182
  for col_name in featureset_df.columns
77
183
  ]
78
184
 
@@ -81,16 +187,13 @@ class SparkFeatureMerger(BaseMerger):
81
187
  # set join conditions
82
188
  join_cond = (
83
189
  entity_with_id[entity_timestamp_column]
84
- >= aliased_featureset_df[
85
- rename_right_keys.get(entity_timestamp_column, entity_timestamp_column)
86
- ]
190
+ >= aliased_featureset_df[f"ft__{entity_timestamp_column}"]
87
191
  )
88
192
 
89
193
  # join based on entities
90
- for key_l, key_r in zip(left_keys, right_keys):
194
+ for key in indexes:
91
195
  join_cond = join_cond & (
92
- entity_with_id[key_l]
93
- == aliased_featureset_df[rename_right_keys.get(key_r, key_r)]
196
+ entity_with_id[key] == aliased_featureset_df[f"ft__{key}"]
94
197
  )
95
198
 
96
199
  conditional_join = entity_with_id.join(
@@ -104,16 +207,14 @@ class SparkFeatureMerger(BaseMerger):
104
207
  "_rank", row_number().over(window)
105
208
  ).filter(col("_rank") == 1)
106
209
 
107
- for key in right_keys + [entity_timestamp_column]:
108
- if key in entity_df.columns + [entity_timestamp_column]:
109
- filter_most_recent_feature_timestamp = (
110
- filter_most_recent_feature_timestamp.drop(
111
- aliased_featureset_df[f"ft__{key}"]
112
- )
210
+ for key in indexes + [entity_timestamp_column]:
211
+ filter_most_recent_feature_timestamp = (
212
+ filter_most_recent_feature_timestamp.drop(
213
+ aliased_featureset_df[f"ft__{key}"]
113
214
  )
114
- return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank").orderBy(
115
- col(entity_timestamp_column)
116
- )
215
+ )
216
+
217
+ return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank")
117
218
 
118
219
  def _join(
119
220
  self,
@@ -123,6 +224,7 @@ class SparkFeatureMerger(BaseMerger):
123
224
  featureset_df,
124
225
  left_keys: list,
125
226
  right_keys: list,
227
+ columns: list,
126
228
  ):
127
229
 
128
230
  """
@@ -143,19 +245,8 @@ class SparkFeatureMerger(BaseMerger):
143
245
  be prefixed with featureset_df name.
144
246
 
145
247
  """
146
- if left_keys != right_keys:
147
- join_cond = [
148
- entity_df[key_l] == featureset_df[key_r]
149
- for key_l, key_r in zip(left_keys, right_keys)
150
- ]
151
- else:
152
- join_cond = left_keys
153
-
154
- merged_df = entity_df.join(
155
- featureset_df,
156
- join_cond,
157
- how=self._join_type,
158
- )
248
+ indexes = list(featureset.spec.entities.keys())
249
+ merged_df = entity_df.join(featureset_df, on=indexes)
159
250
  return merged_df
160
251
 
161
252
  def get_df(self, to_pandas=True):
@@ -177,95 +268,3 @@ class SparkFeatureMerger(BaseMerger):
177
268
  return RemoteSparkRuntime.default_image
178
269
  else:
179
270
  raise mlrun.errors.MLRunInvalidArgumentError(f"Unsupported kind '{kind}'")
180
-
181
- def _create_engine_env(self):
182
- from pyspark.sql import SparkSession
183
-
184
- if self.spark is None:
185
- # create spark context
186
- self.spark = SparkSession.builder.appName(
187
- f"vector-merger-{self.vector.metadata.name}"
188
- ).getOrCreate()
189
-
190
- def _get_engine_df(
191
- self,
192
- feature_set,
193
- feature_set_name,
194
- column_names=None,
195
- start_time=None,
196
- end_time=None,
197
- entity_timestamp_column=None,
198
- ):
199
- if feature_set.spec.passthrough:
200
- if not feature_set.spec.source:
201
- raise mlrun.errors.MLRunNotFoundError(
202
- f"passthrough feature set {feature_set_name} with no source"
203
- )
204
- source_kind = feature_set.spec.source.kind
205
- source_path = feature_set.spec.source.path
206
- else:
207
- target = get_offline_target(feature_set)
208
- if not target:
209
- raise mlrun.errors.MLRunInvalidArgumentError(
210
- f"feature set {feature_set_name} does not have offline targets"
211
- )
212
- source_kind = target.kind
213
- source_path = target.get_target_path()
214
-
215
- # handling case where there are multiple feature sets and user creates vector where
216
- # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
217
- source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
218
- if (
219
- entity_timestamp_column in column_names
220
- or feature_set.spec.timestamp_key == entity_timestamp_column
221
- ):
222
- source = source_driver(
223
- name=self.vector.metadata.name,
224
- path=source_path,
225
- time_field=entity_timestamp_column,
226
- start_time=start_time,
227
- end_time=end_time,
228
- )
229
- else:
230
- source = source_driver(
231
- name=self.vector.metadata.name,
232
- path=source_path,
233
- time_field=entity_timestamp_column,
234
- )
235
-
236
- if not entity_timestamp_column:
237
- entity_timestamp_column = feature_set.spec.timestamp_key
238
- # add the index/key to selected columns
239
- timestamp_key = feature_set.spec.timestamp_key
240
-
241
- return source.to_spark_df(
242
- self.spark, named_view=self.named_view, time_field=timestamp_key
243
- )
244
-
245
- def _rename_columns_and_select(
246
- self,
247
- df,
248
- rename_col_dict,
249
- columns=None,
250
- ):
251
- from pyspark.sql.functions import col
252
-
253
- return df.select(
254
- [
255
- col(name).alias(rename_col_dict.get(name, name))
256
- for name in columns or rename_col_dict.keys()
257
- ]
258
- )
259
-
260
- def _drop_columns_from_result(self):
261
- self._result_df = self._result_df.drop(*self._drop_columns)
262
-
263
- def _filter(self, query):
264
- self._result_df = self._result_df.filter(query)
265
-
266
- def _order_by(self, order_by_active):
267
- from pyspark.sql.functions import col
268
-
269
- self._result_df = self._result_df.orderBy(
270
- *[col(col_name).asc_nulls_last() for col_name in order_by_active]
271
- )
mlrun/features.py CHANGED
@@ -16,7 +16,7 @@ import math
16
16
  import re
17
17
  from typing import Dict, List, Optional, Union
18
18
 
19
- from .data_types import ValueType, python_type_to_value_type
19
+ from .data_types import ValueType
20
20
  from .errors import MLRunRuntimeError, err_to_str
21
21
  from .model import ModelObj
22
22
 
@@ -105,12 +105,7 @@ class Feature(ModelObj):
105
105
  :param labels: a set of key/value labels (tags)
106
106
  """
107
107
  self.name = name or ""
108
- if isinstance(value_type, ValueType):
109
- self.value_type = value_type
110
- elif value_type is not None:
111
- self.value_type = python_type_to_value_type(value_type)
112
- else:
113
- self.value_type = ValueType.STRING
108
+ self.value_type = ValueType(value_type) if value_type else ValueType.STRING
114
109
  self.dims = dims
115
110
  self.description = description
116
111
  self.default = default
@@ -14,17 +14,12 @@
14
14
  #
15
15
  class EventFieldType:
16
16
  FUNCTION_URI = "function_uri"
17
- FUNCTION = "function"
18
- MODEL_URI = "model_uri"
19
17
  MODEL = "model"
20
18
  VERSION = "version"
21
19
  VERSIONED_MODEL = "versioned_model"
22
20
  MODEL_CLASS = "model_class"
23
21
  TIMESTAMP = "timestamp"
24
- # `endpoint_id` is deprecated as a field in the model endpoint schema since 1.3.1, replaced by `uid`.
25
22
  ENDPOINT_ID = "endpoint_id"
26
- UID = "uid"
27
- ENDPOINT_TYPE = "endpoint_type"
28
23
  REQUEST_ID = "request_id"
29
24
  RECORD_TYPE = "record_type"
30
25
  FEATURES = "features"
@@ -32,6 +27,8 @@ class EventFieldType:
32
27
  NAMED_FEATURES = "named_features"
33
28
  LABELS = "labels"
34
29
  LATENCY = "latency"
30
+ UNPACKED_LABELS = "unpacked_labels"
31
+ LABEL_COLUMNS = "label_columns"
35
32
  LABEL_NAMES = "label_names"
36
33
  PREDICTION = "prediction"
37
34
  PREDICTIONS = "predictions"
@@ -41,27 +38,15 @@ class EventFieldType:
41
38
  FIRST_REQUEST = "first_request"
42
39
  LAST_REQUEST = "last_request"
43
40
  METRICS = "metrics"
41
+ BATCH_TIMESTAMP = "batch_timestamp"
44
42
  TIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
45
43
  BATCH_INTERVALS_DICT = "batch_intervals_dict"
46
44
  DEFAULT_BATCH_INTERVALS = "default_batch_intervals"
45
+ DEFAULT_BATCH_IMAGE = "default_batch_image"
46
+ STREAM_IMAGE = "stream_image"
47
47
  MINUTES = "minutes"
48
48
  HOURS = "hours"
49
49
  DAYS = "days"
50
- MODEL_ENDPOINTS = "model_endpoints"
51
- STATE = "state"
52
- PROJECT = "project"
53
- STREAM_PATH = "stream_path"
54
- ACTIVE = "active"
55
- MONITORING_MODE = "monitoring_mode"
56
- FEATURE_STATS = "feature_stats"
57
- CURRENT_STATS = "current_stats"
58
- CHILDREN = "children"
59
- CHILDREN_UIDS = "children_uids"
60
- DRIFT_MEASURES = "drift_measures"
61
- DRIFT_STATUS = "drift_status"
62
- MONITOR_CONFIGURATION = "monitor_configuration"
63
- FEATURE_SET_URI = "monitoring_feature_set_uri"
64
- ALGORITHM = "algorithm"
65
50
 
66
51
 
67
52
  class EventLiveStats:
@@ -76,34 +61,7 @@ class EventKeyMetrics:
76
61
  BASE_METRICS = "base_metrics"
77
62
  CUSTOM_METRICS = "custom_metrics"
78
63
  ENDPOINT_FEATURES = "endpoint_features"
79
- GENERIC = "generic"
80
- REAL_TIME = "real_time"
81
64
 
82
65
 
83
- class TimeSeriesTarget:
66
+ class StoreTarget:
84
67
  TSDB = "tsdb"
85
-
86
-
87
- class ModelEndpointTarget:
88
- V3IO_NOSQL = "v3io-nosql"
89
- SQL = "sql"
90
-
91
-
92
- class ProjectSecretKeys:
93
- ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
94
- ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
95
- KAFKA_BOOTSTRAP_SERVERS = "KAFKA_BOOTSTRAP_SERVERS"
96
- STREAM_PATH = "STREAM_PATH"
97
-
98
-
99
- class ModelMonitoringStoreKinds:
100
- ENDPOINTS = "endpoints"
101
- EVENTS = "events"
102
-
103
-
104
- class FileTargetKind:
105
- ENDPOINTS = "endpoints"
106
- EVENTS = "events"
107
- STREAM = "stream"
108
- PARQUET = "parquet"
109
- LOG_STREAM = "log_stream"
@@ -13,25 +13,18 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  import pathlib
16
- import typing
17
16
 
18
17
  import sqlalchemy.orm
19
- from fastapi import Depends
20
18
 
21
19
  import mlrun
22
20
  import mlrun.api.api.utils
23
21
  import mlrun.api.crud.secrets
24
- import mlrun.api.schemas
25
22
  import mlrun.api.utils.singletons.db
26
- import mlrun.api.utils.singletons.k8s
27
23
  import mlrun.config
28
24
  import mlrun.feature_store as fstore
29
- import mlrun.model_monitoring.constants as model_monitoring_constants
30
25
  import mlrun.model_monitoring.stream_processing_fs
31
26
  import mlrun.runtimes
32
27
  import mlrun.utils.helpers
33
- import mlrun.utils.model_monitoring
34
- from mlrun.api.api import deps
35
28
 
36
29
  _CURRENT_FILE_PATH = pathlib.Path(__file__)
37
30
  _STREAM_PROCESSING_FUNCTION_PATH = _CURRENT_FILE_PATH.parent / "stream_processing_fs.py"
@@ -43,20 +36,16 @@ _MONIOTINRG_BATCH_FUNCTION_PATH = (
43
36
  def initial_model_monitoring_stream_processing_function(
44
37
  project: str,
45
38
  model_monitoring_access_key: str,
39
+ db_session: sqlalchemy.orm.Session,
46
40
  tracking_policy: mlrun.utils.model_monitoring.TrackingPolicy,
47
- auth_info: mlrun.api.schemas.AuthInfo,
48
- parquet_target: str,
49
41
  ):
50
42
  """
51
43
  Initialize model monitoring stream processing function.
52
44
 
53
- :param project: Project name.
54
- :param model_monitoring_access_key: Access key to apply the model monitoring process. Please note that in CE
55
- deployments this parameter will be None.
45
+ :param project: project name.
46
+ :param model_monitoring_access_key: access key to apply the model monitoring process.
47
+ :param db_session: A session that manages the current dialog with the database.
56
48
  :param tracking_policy: Model monitoring configurations.
57
- :param auth_info: The auth info of the request.
58
- :parquet_target: Path to model monitoring parquet file that will be generated by the monitoring
59
- stream nuclio function.
60
49
 
61
50
  :return: A function object from a mlrun runtime class
62
51
 
@@ -65,11 +54,12 @@ def initial_model_monitoring_stream_processing_function(
65
54
  # Initialize Stream Processor object
66
55
  stream_processor = mlrun.model_monitoring.stream_processing_fs.EventStreamProcessor(
67
56
  project=project,
68
- parquet_batching_max_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
69
- parquet_target=parquet_target,
70
57
  model_monitoring_access_key=model_monitoring_access_key,
58
+ parquet_batching_max_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
71
59
  )
72
60
 
61
+ http_source = mlrun.datastore.sources.HttpSource()
62
+
73
63
  # Create a new serving function for the streaming process
74
64
  function = mlrun.code_to_function(
75
65
  name="model-monitoring-stream",
@@ -85,19 +75,32 @@ def initial_model_monitoring_stream_processing_function(
85
75
  # Set the project to the serving function
86
76
  function.metadata.project = project
87
77
 
88
- # Add stream triggers
89
- function = _apply_stream_trigger(
90
- project=project,
91
- function=function,
92
- model_monitoring_access_key=model_monitoring_access_key,
93
- auth_info=auth_info,
78
+ # Add v3io stream trigger
79
+ stream_path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
80
+ project=project, kind="stream"
81
+ )
82
+ function.add_v3io_stream_trigger(
83
+ stream_path=stream_path, name="monitoring_stream_trigger"
84
+ )
85
+
86
+ # Set model monitoring access key for managing permissions
87
+ function.set_env_from_secret(
88
+ "MODEL_MONITORING_ACCESS_KEY",
89
+ mlrun.api.utils.singletons.k8s.get_k8s().get_project_secret_name(project),
90
+ mlrun.api.crud.secrets.Secrets().generate_client_project_secret_key(
91
+ mlrun.api.crud.secrets.SecretsClientType.model_monitoring,
92
+ "MODEL_MONITORING_ACCESS_KEY",
93
+ ),
94
94
  )
95
95
 
96
- # Apply feature store run configurations on the serving function
97
96
  run_config = fstore.RunConfig(function=function, local=False)
98
97
  function.spec.parameters = run_config.parameters
99
98
 
100
- return function
99
+ func = http_source.add_nuclio_trigger(function)
100
+ func.metadata.credentials.access_key = model_monitoring_access_key
101
+ func.apply(mlrun.v3io_cred())
102
+
103
+ return func
101
104
 
102
105
 
103
106
  def get_model_monitoring_batch_function(
@@ -111,8 +114,7 @@ def get_model_monitoring_batch_function(
111
114
  Initialize model monitoring batch function.
112
115
 
113
116
  :param project: project name.
114
- :param model_monitoring_access_key: access key to apply the model monitoring process. Please note that in CE
115
- deployments this parameter will be None.
117
+ :param model_monitoring_access_key: access key to apply the model monitoring process.
116
118
  :param db_session: A session that manages the current dialog with the database.
117
119
  :param auth_info: The auth info of the request.
118
120
  :param tracking_policy: Model monitoring configurations.
@@ -135,106 +137,21 @@ def get_model_monitoring_batch_function(
135
137
  # Set the project to the job function
136
138
  function.metadata.project = project
137
139
 
138
- if not mlrun.mlconf.is_ce_mode():
139
- function = _apply_access_key_and_mount_function(
140
- project=project,
141
- function=function,
142
- model_monitoring_access_key=model_monitoring_access_key,
143
- auth_info=auth_info,
144
- )
145
-
146
- # Enrich runtime with the required configurations
147
- mlrun.api.api.utils.apply_enrichment_and_validation_on_function(function, auth_info)
148
-
149
- return function
150
-
151
-
152
- def _apply_stream_trigger(
153
- project: str,
154
- function: mlrun.runtimes.ServingRuntime,
155
- model_monitoring_access_key: str = None,
156
- auth_info: mlrun.api.schemas.AuthInfo = Depends(deps.authenticate_request),
157
- ) -> mlrun.runtimes.ServingRuntime:
158
- """Adding stream source for the nuclio serving function. By default, the function has HTTP stream trigger along
159
- with another supported stream source that can be either Kafka or V3IO, depends on the stream path schema that is
160
- defined under mlrun.mlconf.model_endpoint_monitoring.store_prefixes. Note that if no valid stream path has been
161
- provided then the function will have a single HTTP stream source.
162
-
163
- :param project: Project name.
164
- :param function: The serving function object that will be applied with the stream trigger.
165
- :param model_monitoring_access_key: Access key to apply the model monitoring stream function when the stream is
166
- schema is V3IO.
167
- :param auth_info: The auth info of the request.
168
-
169
- :return: ServingRuntime object with stream trigger.
170
- """
171
-
172
- # Get the stream path from the configuration
173
- # stream_path = mlrun.mlconf.get_file_target_path(project=project, kind="stream", target="stream")
174
- stream_path = mlrun.utils.model_monitoring.get_stream_path(project=project)
175
-
176
- if stream_path.startswith("kafka://"):
177
-
178
- topic, brokers = mlrun.datastore.utils.parse_kafka_url(url=stream_path)
179
- # Generate Kafka stream source
180
- stream_source = mlrun.datastore.sources.KafkaSource(
181
- brokers=brokers,
182
- topics=[topic],
183
- )
184
- function = stream_source.add_nuclio_trigger(function)
185
-
186
- if not mlrun.mlconf.is_ce_mode():
187
- function = _apply_access_key_and_mount_function(
188
- project=project,
189
- function=function,
190
- model_monitoring_access_key=model_monitoring_access_key,
191
- auth_info=auth_info,
192
- )
193
- if stream_path.startswith("v3io://"):
194
- # Generate V3IO stream trigger
195
- function.add_v3io_stream_trigger(
196
- stream_path=stream_path, name="monitoring_stream_trigger"
197
- )
198
- # Add the default HTTP source
199
- http_source = mlrun.datastore.sources.HttpSource()
200
- function = http_source.add_nuclio_trigger(function)
201
-
202
- return function
203
-
204
-
205
- def _apply_access_key_and_mount_function(
206
- project: str,
207
- function: typing.Union[
208
- mlrun.runtimes.KubejobRuntime, mlrun.runtimes.ServingRuntime
209
- ],
210
- model_monitoring_access_key: str,
211
- auth_info: mlrun.api.schemas.AuthInfo,
212
- ) -> typing.Union[mlrun.runtimes.KubejobRuntime, mlrun.runtimes.ServingRuntime]:
213
- """Applying model monitoring access key on the provided function when using V3IO path. In addition, this method
214
- mount the V3IO path for the provided function to configure the access to the system files.
215
-
216
- :param project: Project name.
217
- :param function: Model monitoring function object that will be filled with the access key and
218
- the access to the system files.
219
- :param model_monitoring_access_key: Access key to apply the model monitoring stream function when the stream is
220
- schema is V3IO.
221
- :param auth_info: The auth info of the request.
222
-
223
- :return: function runtime object with access key and access to system files.
224
- """
225
-
226
140
  # Set model monitoring access key for managing permissions
227
141
  function.set_env_from_secret(
228
- model_monitoring_constants.ProjectSecretKeys.ACCESS_KEY,
142
+ "MODEL_MONITORING_ACCESS_KEY",
229
143
  mlrun.api.utils.singletons.k8s.get_k8s().get_project_secret_name(project),
230
144
  mlrun.api.crud.secrets.Secrets().generate_client_project_secret_key(
231
145
  mlrun.api.crud.secrets.SecretsClientType.model_monitoring,
232
- model_monitoring_constants.ProjectSecretKeys.ACCESS_KEY,
146
+ "MODEL_MONITORING_ACCESS_KEY",
233
147
  ),
234
148
  )
235
- function.metadata.credentials.access_key = model_monitoring_access_key
149
+
236
150
  function.apply(mlrun.mount_v3io())
237
151
 
152
+ # Needs to be a member of the project and have access to project data path
153
+ function.metadata.credentials.access_key = model_monitoring_access_key
154
+
238
155
  # Ensure that the auth env vars are set
239
156
  mlrun.api.api.utils.ensure_function_has_auth_set(function, auth_info)
240
157