mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (75) hide show
  1. mlrun/artifacts/base.py +2 -1
  2. mlrun/artifacts/plots.py +9 -5
  3. mlrun/common/constants.py +6 -0
  4. mlrun/common/schemas/__init__.py +2 -0
  5. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  6. mlrun/common/schemas/model_monitoring/constants.py +35 -18
  7. mlrun/common/schemas/project.py +1 -0
  8. mlrun/common/types.py +7 -1
  9. mlrun/config.py +19 -6
  10. mlrun/data_types/data_types.py +4 -0
  11. mlrun/datastore/alibaba_oss.py +130 -0
  12. mlrun/datastore/azure_blob.py +4 -5
  13. mlrun/datastore/base.py +22 -16
  14. mlrun/datastore/datastore.py +4 -0
  15. mlrun/datastore/google_cloud_storage.py +1 -1
  16. mlrun/datastore/sources.py +7 -7
  17. mlrun/db/base.py +14 -6
  18. mlrun/db/factory.py +1 -1
  19. mlrun/db/httpdb.py +61 -56
  20. mlrun/db/nopdb.py +3 -0
  21. mlrun/launcher/__init__.py +1 -1
  22. mlrun/launcher/base.py +1 -1
  23. mlrun/launcher/client.py +1 -1
  24. mlrun/launcher/factory.py +1 -1
  25. mlrun/launcher/local.py +1 -1
  26. mlrun/launcher/remote.py +1 -1
  27. mlrun/model.py +1 -0
  28. mlrun/model_monitoring/__init__.py +1 -1
  29. mlrun/model_monitoring/api.py +104 -301
  30. mlrun/model_monitoring/application.py +21 -21
  31. mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
  32. mlrun/model_monitoring/controller.py +26 -33
  33. mlrun/model_monitoring/db/__init__.py +16 -0
  34. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
  35. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  36. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
  37. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  38. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
  39. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
  40. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
  41. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
  42. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
  43. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  44. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
  45. mlrun/model_monitoring/features_drift_table.py +34 -22
  46. mlrun/model_monitoring/helpers.py +45 -6
  47. mlrun/model_monitoring/stream_processing.py +43 -9
  48. mlrun/model_monitoring/tracking_policy.py +7 -1
  49. mlrun/model_monitoring/writer.py +4 -36
  50. mlrun/projects/pipelines.py +13 -1
  51. mlrun/projects/project.py +279 -117
  52. mlrun/run.py +72 -74
  53. mlrun/runtimes/__init__.py +35 -0
  54. mlrun/runtimes/base.py +7 -1
  55. mlrun/runtimes/nuclio/api_gateway.py +188 -61
  56. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  57. mlrun/runtimes/nuclio/application/application.py +283 -0
  58. mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
  59. mlrun/runtimes/nuclio/function.py +53 -1
  60. mlrun/runtimes/nuclio/serving.py +28 -32
  61. mlrun/runtimes/pod.py +27 -1
  62. mlrun/serving/server.py +4 -6
  63. mlrun/serving/states.py +41 -33
  64. mlrun/utils/helpers.py +34 -0
  65. mlrun/utils/version/version.json +2 -2
  66. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
  67. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
  68. mlrun/model_monitoring/batch.py +0 -974
  69. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  70. mlrun/model_monitoring/stores/models/mysql.py +0 -34
  71. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  72. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
  73. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
  74. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
  75. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0
@@ -13,13 +13,17 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from dataclasses import dataclass
16
- from typing import Final, Optional, Protocol
16
+ from typing import Final, Optional, Protocol, cast
17
17
 
18
18
  import numpy as np
19
- from pandas import DataFrame, Timestamp
19
+ from pandas import DataFrame, Series, Timestamp
20
20
 
21
+ import mlrun.artifacts
22
+ import mlrun.common.model_monitoring.helpers
23
+ import mlrun.model_monitoring.features_drift_table as mm_drift_table
21
24
  from mlrun.common.schemas.model_monitoring.constants import (
22
25
  MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME,
26
+ EventFieldType,
23
27
  ResultKindApp,
24
28
  ResultStatusApp,
25
29
  )
@@ -27,7 +31,7 @@ from mlrun.model_monitoring.application import (
27
31
  ModelMonitoringApplicationBase,
28
32
  ModelMonitoringApplicationResult,
29
33
  )
30
- from mlrun.model_monitoring.batch import (
34
+ from mlrun.model_monitoring.metrics.histogram_distance import (
31
35
  HellingerDistance,
32
36
  HistogramDistanceMetric,
33
37
  KullbackLeiblerDivergence,
@@ -115,31 +119,24 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
115
119
 
116
120
  def _compute_metrics_per_feature(
117
121
  self, sample_df_stats: DataFrame, feature_stats: DataFrame
118
- ) -> dict[type[HistogramDistanceMetric], list[float]]:
122
+ ) -> DataFrame:
119
123
  """Compute the metrics for the different features and labels"""
120
- metrics_per_feature: dict[type[HistogramDistanceMetric], list[float]] = {
121
- metric_class: [] for metric_class in self.metrics
122
- }
124
+ metrics_per_feature = DataFrame(
125
+ columns=[metric_class.NAME for metric_class in self.metrics]
126
+ )
123
127
 
124
- for (sample_feat, sample_hist), (reference_feat, reference_hist) in zip(
125
- sample_df_stats.items(), feature_stats.items()
126
- ):
127
- assert sample_feat == reference_feat, "The features do not match"
128
+ for feature_name in feature_stats:
129
+ sample_hist = np.asarray(sample_df_stats[feature_name])
130
+ reference_hist = np.asarray(feature_stats[feature_name])
128
131
  self.context.logger.info(
129
- "Computing metrics for feature", feature_name=sample_feat
132
+ "Computing metrics for feature", feature_name=feature_name
130
133
  )
131
- sample_arr = np.asarray(sample_hist)
132
- reference_arr = np.asarray(reference_hist)
133
- for metric in self.metrics:
134
- metric_name = metric.NAME
135
- self.context.logger.debug(
136
- "Computing data drift metric",
137
- metric_name=metric_name,
138
- feature_name=sample_feat,
139
- )
140
- metrics_per_feature[metric].append(
141
- metric(distrib_t=sample_arr, distrib_u=reference_arr).compute()
142
- )
134
+ metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
135
+ metric.NAME: metric(
136
+ distrib_t=sample_hist, distrib_u=reference_hist
137
+ ).compute()
138
+ for metric in self.metrics
139
+ }
143
140
  self.context.logger.info("Finished computing the metrics")
144
141
 
145
142
  return metrics_per_feature
@@ -147,37 +144,37 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
147
144
  def _add_general_drift_result(
148
145
  self, results: list[ModelMonitoringApplicationResult], value: float
149
146
  ) -> None:
147
+ """Add the general drift result to the results list and log it"""
148
+ status = self._value_classifier.value_to_status(value)
150
149
  results.append(
151
150
  ModelMonitoringApplicationResult(
152
151
  name="general_drift",
153
152
  value=value,
154
153
  kind=self.METRIC_KIND,
155
- status=self._value_classifier.value_to_status(value),
154
+ status=status,
156
155
  )
157
156
  )
158
157
 
159
158
  def _get_results(
160
- self, metrics_per_feature: dict[type[HistogramDistanceMetric], list[float]]
159
+ self, metrics_per_feature: DataFrame
161
160
  ) -> list[ModelMonitoringApplicationResult]:
162
161
  """Average the metrics over the features and add the status"""
163
162
  results: list[ModelMonitoringApplicationResult] = []
164
- hellinger_tvd_values: list[float] = []
165
- for metric_class, metric_values in metrics_per_feature.items():
166
- self.context.logger.debug(
167
- "Averaging metric over the features", metric_name=metric_class.NAME
168
- )
169
- value = np.mean(metric_values)
170
- if metric_class == KullbackLeiblerDivergence:
163
+
164
+ self.context.logger.debug("Averaging metrics over the features")
165
+ metrics_mean = metrics_per_feature.mean().to_dict()
166
+
167
+ self.context.logger.debug("Creating the results")
168
+ for name, value in metrics_mean.items():
169
+ if name == KullbackLeiblerDivergence.NAME:
171
170
  # This metric is not bounded from above [0, inf).
172
171
  # No status is currently reported for KL divergence
173
172
  status = ResultStatusApp.irrelevant
174
173
  else:
175
174
  status = self._value_classifier.value_to_status(value)
176
- if metric_class in self._REQUIRED_METRICS:
177
- hellinger_tvd_values.append(value)
178
175
  results.append(
179
176
  ModelMonitoringApplicationResult(
180
- name=f"{metric_class.NAME}_mean",
177
+ name=f"{name}_mean",
181
178
  value=value,
182
179
  kind=self.METRIC_KIND,
183
180
  status=status,
@@ -185,16 +182,102 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
185
182
  )
186
183
 
187
184
  self._add_general_drift_result(
188
- results=results, value=np.mean(hellinger_tvd_values)
185
+ results=results,
186
+ value=np.mean(
187
+ [
188
+ metrics_mean[HellingerDistance.NAME],
189
+ metrics_mean[TotalVarianceDistance.NAME],
190
+ ]
191
+ ),
189
192
  )
190
193
 
194
+ self.context.logger.info("Finished with the results")
191
195
  return results
192
196
 
197
+ @staticmethod
198
+ def _remove_timestamp_feature(
199
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
200
+ ) -> mlrun.common.model_monitoring.helpers.FeatureStats:
201
+ """
202
+ Drop the 'timestamp' feature if it exists, as it is irrelevant
203
+ in the plotly artifact
204
+ """
205
+ sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
206
+ sample_set_statistics.copy()
207
+ )
208
+ if EventFieldType.TIMESTAMP in sample_set_statistics:
209
+ del sample_set_statistics[EventFieldType.TIMESTAMP]
210
+ return sample_set_statistics
211
+
212
+ def _log_json_artifact(self, drift_per_feature_values: Series) -> None:
213
+ """Log the drift values as a JSON artifact"""
214
+ self.context.logger.debug("Logging drift value per feature JSON artifact")
215
+ self.context.log_artifact(
216
+ mlrun.artifacts.Artifact(
217
+ body=drift_per_feature_values.to_json(),
218
+ format="json",
219
+ key="features_drift_results",
220
+ )
221
+ )
222
+ self.context.logger.debug("Logged JSON artifact successfully")
223
+
224
+ def _log_plotly_table_artifact(
225
+ self,
226
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
227
+ inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
228
+ metrics_per_feature: DataFrame,
229
+ drift_per_feature_values: Series,
230
+ ) -> None:
231
+ """Log the Plotly drift table artifact"""
232
+ self.context.logger.debug(
233
+ "Feature stats",
234
+ sample_set_statistics=sample_set_statistics,
235
+ inputs_statistics=inputs_statistics,
236
+ )
237
+
238
+ self.context.logger.debug("Computing drift results per feature")
239
+ drift_results = {
240
+ cast(str, key): (self._value_classifier.value_to_status(value), value)
241
+ for key, value in drift_per_feature_values.items()
242
+ }
243
+ self.context.logger.debug("Logging plotly artifact")
244
+ self.context.log_artifact(
245
+ mm_drift_table.FeaturesDriftTablePlot().produce(
246
+ sample_set_statistics=sample_set_statistics,
247
+ inputs_statistics=inputs_statistics,
248
+ metrics=metrics_per_feature.T.to_dict(),
249
+ drift_results=drift_results,
250
+ )
251
+ )
252
+ self.context.logger.debug("Logged plotly artifact successfully")
253
+
254
+ def _log_drift_artifacts(
255
+ self,
256
+ sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
257
+ inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
258
+ metrics_per_feature: DataFrame,
259
+ log_json_artifact: bool = True,
260
+ ) -> None:
261
+ """Log JSON and Plotly drift data per feature artifacts"""
262
+ drift_per_feature_values = metrics_per_feature[
263
+ [HellingerDistance.NAME, TotalVarianceDistance.NAME]
264
+ ].mean(axis=1)
265
+
266
+ if log_json_artifact:
267
+ self._log_json_artifact(drift_per_feature_values)
268
+
269
+ self._log_plotly_table_artifact(
270
+ sample_set_statistics=self._remove_timestamp_feature(sample_set_statistics),
271
+ inputs_statistics=inputs_statistics,
272
+ metrics_per_feature=metrics_per_feature,
273
+ drift_per_feature_values=drift_per_feature_values,
274
+ )
275
+
193
276
  def do_tracking(
194
277
  self,
195
278
  application_name: str,
196
- sample_df_stats: DataFrame,
197
- feature_stats: DataFrame,
279
+ sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
280
+ feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
198
281
  sample_df: DataFrame,
199
282
  start_infer_time: Timestamp,
200
283
  end_infer_time: Timestamp,
@@ -210,7 +293,14 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
210
293
  """
211
294
  self.context.logger.debug("Starting to run the application")
212
295
  metrics_per_feature = self._compute_metrics_per_feature(
213
- sample_df_stats=sample_df_stats, feature_stats=feature_stats
296
+ sample_df_stats=self.dict_to_histogram(sample_df_stats),
297
+ feature_stats=self.dict_to_histogram(feature_stats),
298
+ )
299
+ self.context.logger.debug("Saving artifacts")
300
+ self._log_drift_artifacts(
301
+ inputs_statistics=feature_stats,
302
+ sample_set_statistics=sample_df_stats,
303
+ metrics_per_feature=metrics_per_feature,
214
304
  )
215
305
  self.context.logger.debug("Computing average per metric")
216
306
  results = self._get_results(metrics_per_feature)
@@ -21,25 +21,24 @@ from collections.abc import Iterator
21
21
  from typing import Any, NamedTuple, Optional, Union, cast
22
22
 
23
23
  import nuclio
24
- from v3io.dataplane.response import HttpResponseError
25
24
 
26
25
  import mlrun
27
26
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
28
27
  import mlrun.data_types.infer
29
28
  import mlrun.feature_store as fstore
29
+ import mlrun.model_monitoring.db.stores
30
30
  from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
31
31
  from mlrun.datastore import get_stream_pusher
32
32
  from mlrun.datastore.targets import ParquetTarget
33
33
  from mlrun.errors import err_to_str
34
- from mlrun.model_monitoring.batch import calculate_inputs_statistics
35
34
  from mlrun.model_monitoring.helpers import (
36
35
  _BatchDict,
37
36
  batch_dict2timedelta,
37
+ calculate_inputs_statistics,
38
38
  get_monitoring_parquet_path,
39
39
  get_stream_path,
40
40
  )
41
- from mlrun.utils import create_logger, datetime_now, logger
42
- from mlrun.utils.v3io_clients import get_v3io_client
41
+ from mlrun.utils import datetime_now, logger
43
42
 
44
43
 
45
44
  class _Interval(NamedTuple):
@@ -48,8 +47,6 @@ class _Interval(NamedTuple):
48
47
 
49
48
 
50
49
  class _BatchWindow:
51
- V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
52
-
53
50
  def __init__(
54
51
  self,
55
52
  project: str,
@@ -65,27 +62,22 @@ class _BatchWindow:
65
62
  All the time values are in seconds.
66
63
  The start and stop time are in seconds since the epoch.
67
64
  """
65
+ self.project = project
68
66
  self._endpoint = endpoint
69
67
  self._application = application
70
68
  self._first_request = first_request
71
- self._kv_storage = get_v3io_client(
72
- endpoint=mlrun.mlconf.v3io_api,
73
- # Avoid noisy warning logs before the KV table is created
74
- logger=create_logger(name="v3io_client", level="error"),
75
- ).kv
76
- self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
77
69
  self._stop = last_updated
78
70
  self._step = timedelta_seconds
71
+ self._db = mlrun.model_monitoring.get_store_object(project=self.project)
79
72
  self._start = self._get_last_analyzed()
80
73
 
81
74
  def _get_last_analyzed(self) -> Optional[int]:
82
75
  try:
83
- data = self._kv_storage.get(
84
- container=self._v3io_container,
85
- table_path=self._endpoint,
86
- key=self._application,
76
+ last_analyzed = self._db.get_last_analyzed(
77
+ endpoint_id=self._endpoint,
78
+ application_name=self._application,
87
79
  )
88
- except HttpResponseError as err:
80
+ except mlrun.errors.MLRunNotFoundError:
89
81
  logger.info(
90
82
  "No last analyzed time was found for this endpoint and "
91
83
  "application, as this is probably the first time this "
@@ -96,7 +88,7 @@ class _BatchWindow:
96
88
  first_request=self._first_request,
97
89
  last_updated=self._stop,
98
90
  )
99
- logger.debug("Error while getting last analyzed time", err=err)
91
+
100
92
  if self._first_request and self._stop:
101
93
  # TODO : Change the timedelta according to the policy.
102
94
  first_period_in_seconds = max(
@@ -108,7 +100,6 @@ class _BatchWindow:
108
100
  )
109
101
  return self._first_request
110
102
 
111
- last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
112
103
  logger.info(
113
104
  "Got the last analyzed time for this endpoint and application",
114
105
  endpoint=self._endpoint,
@@ -124,11 +115,11 @@ class _BatchWindow:
124
115
  application=self._application,
125
116
  last_analyzed=last_analyzed,
126
117
  )
127
- self._kv_storage.put(
128
- container=self._v3io_container,
129
- table_path=self._endpoint,
130
- key=self._application,
131
- attributes={mm_constants.SchedulingKeys.LAST_ANALYZED: last_analyzed},
118
+
119
+ self._db.update_last_analyzed(
120
+ endpoint_id=self._endpoint,
121
+ application_name=self._application,
122
+ last_analyzed=last_analyzed,
132
123
  )
133
124
 
134
125
  def get_intervals(
@@ -301,7 +292,7 @@ class MonitoringApplicationController:
301
292
  f"Initializing {self.__class__.__name__}", project=project
302
293
  )
303
294
 
304
- self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
295
+ self.db = mlrun.model_monitoring.get_store_object(project=project)
305
296
 
306
297
  self._batch_window_generator = _BatchWindowGenerator(
307
298
  batch_dict=json.loads(
@@ -359,7 +350,12 @@ class MonitoringApplicationController:
359
350
  {
360
351
  app.metadata.name
361
352
  for app in monitoring_functions
362
- if app.status.state == "ready"
353
+ if (
354
+ app.status.state == "ready"
355
+ # workaround for the default app, as its `status.state` is `None`
356
+ or app.metadata.name
357
+ == mm_constants.MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME
358
+ )
363
359
  }
364
360
  )
365
361
  if not applications_names:
@@ -367,6 +363,10 @@ class MonitoringApplicationController:
367
363
  "No monitoring functions found", project=self.project
368
364
  )
369
365
  return
366
+ self.context.logger.info(
367
+ "Starting to iterate over the applications",
368
+ applications=applications_names,
369
+ )
370
370
 
371
371
  except Exception as e:
372
372
  self.context.logger.error(
@@ -445,13 +445,6 @@ class MonitoringApplicationController:
445
445
  m_fs = fstore.get_feature_set(
446
446
  endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
447
447
  )
448
- labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
449
- if labels:
450
- if isinstance(labels, str):
451
- labels = json.loads(labels)
452
- for label in labels:
453
- if label not in list(m_fs.spec.features.keys()):
454
- m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
455
448
 
456
449
  for application in applications_names:
457
450
  batch_window = batch_window_generator.get_batch_window(
@@ -0,0 +1,16 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .stores import ObjectStoreFactory, get_store_object
16
+ from .stores.base import StoreBase
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Iguazio
1
+ # Copyright 2024 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,60 +16,54 @@
16
16
 
17
17
  import enum
18
18
  import typing
19
+ import warnings
19
20
 
20
21
  import mlrun.common.schemas.secret
21
22
  import mlrun.errors
22
23
 
23
- from .model_endpoint_store import ModelEndpointStore
24
+ from .base import StoreBase
24
25
 
25
26
 
26
- class ModelEndpointStoreType(enum.Enum):
27
- """Enum class to handle the different store type values for saving a model endpoint record."""
27
+ class ObjectStoreFactory(enum.Enum):
28
+ """Enum class to handle the different store type values for saving model monitoring records."""
28
29
 
29
30
  v3io_nosql = "v3io-nosql"
30
31
  SQL = "sql"
31
32
 
32
- def to_endpoint_store(
33
+ def to_object_store(
33
34
  self,
34
35
  project: str,
35
36
  access_key: str = None,
36
- endpoint_store_connection: str = None,
37
37
  secret_provider: typing.Callable = None,
38
- ) -> ModelEndpointStore:
38
+ ) -> StoreBase:
39
39
  """
40
- Return a ModelEndpointStore object based on the provided enum value.
41
-
42
- :param project: The name of the project.
43
- :param access_key: Access key with permission to the DB table. Note that if access key is None
44
- and the endpoint target is from type KV then the access key will be
45
- retrieved from the environment variable.
46
- :param endpoint_store_connection: A valid connection string for model endpoint target. Contains several
47
- key-value pairs that required for the database connection.
48
- e.g. A root user with password 1234, tries to connect a schema called
49
- mlrun within a local MySQL DB instance:
50
- 'mysql+pymysql://root:1234@localhost:3306/mlrun'.
40
+ Return a StoreBase object based on the provided enum value.
41
+
42
+ :param project: The name of the project.
43
+ :param access_key: Access key with permission to the DB table. Note that if access key is None
44
+ and the endpoint target is from type KV then the access key will be
45
+ retrieved from the environment variable.
51
46
  :param secret_provider: An optional secret provider to get the connection string secret.
52
47
 
53
- :return: `ModelEndpointStore` object.
48
+ :return: `StoreBase` object.
54
49
 
55
50
  """
56
51
 
57
- if self.value == ModelEndpointStoreType.v3io_nosql.value:
58
- from .kv_model_endpoint_store import KVModelEndpointStore
52
+ if self == self.v3io_nosql:
53
+ from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
59
54
 
60
55
  # Get V3IO access key from env
61
56
  access_key = access_key or mlrun.mlconf.get_v3io_access_key()
62
57
 
63
- return KVModelEndpointStore(project=project, access_key=access_key)
58
+ return KVStoreBase(project=project, access_key=access_key)
64
59
 
65
60
  # Assuming SQL store target if store type is not KV.
66
61
  # Update these lines once there are more than two store target types.
67
62
 
68
- from .sql_model_endpoint_store import SQLModelEndpointStore
63
+ from mlrun.model_monitoring.db.stores.sqldb.sql_store import SQLStoreBase
69
64
 
70
- return SQLModelEndpointStore(
65
+ return SQLStoreBase(
71
66
  project=project,
72
- sql_connection_string=endpoint_store_connection,
73
67
  secret_provider=secret_provider,
74
68
  )
75
69
 
@@ -88,7 +82,24 @@ def get_model_endpoint_store(
88
82
  project: str,
89
83
  access_key: str = None,
90
84
  secret_provider: typing.Callable = None,
91
- ) -> ModelEndpointStore:
85
+ ) -> StoreBase:
86
+ # Leaving here for backwards compatibility
87
+ warnings.warn(
88
+ "The 'get_model_endpoint_store' function is deprecated and will be removed in 1.9.0. "
89
+ "Please use `get_store_object` instead.",
90
+ # TODO: remove in 1.9.0
91
+ FutureWarning,
92
+ )
93
+ return get_store_object(
94
+ project=project, access_key=access_key, secret_provider=secret_provider
95
+ )
96
+
97
+
98
+ def get_store_object(
99
+ project: str,
100
+ access_key: str = None,
101
+ secret_provider: typing.Callable = None,
102
+ ) -> StoreBase:
92
103
  """
93
104
  Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
94
105
 
@@ -96,16 +107,14 @@ def get_model_endpoint_store(
96
107
  :param access_key: Access key with permission to the DB table.
97
108
  :param secret_provider: An optional secret provider to get the connection string secret.
98
109
 
99
- :return: `ModelEndpointStore` object. Using this object, the user can apply different operations on the
100
- model endpoint record such as write, update, get and delete.
110
+ :return: `StoreBase` object. Using this object, the user can apply different operations on the
111
+ model monitoring record such as write, update, get and delete a model endpoint.
101
112
  """
102
113
 
103
- # Get store type value from ModelEndpointStoreType enum class
104
- model_endpoint_store_type = ModelEndpointStoreType(
105
- mlrun.mlconf.model_endpoint_monitoring.store_type
106
- )
114
+ # Get store type value from ObjectStoreFactory enum class
115
+ store_type = ObjectStoreFactory(mlrun.mlconf.model_endpoint_monitoring.store_type)
107
116
 
108
- # Convert into model endpoint store target object
109
- return model_endpoint_store_type.to_endpoint_store(
117
+ # Convert into store target object
118
+ return store_type.to_object_store(
110
119
  project=project, access_key=access_key, secret_provider=secret_provider
111
120
  )
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .store import StoreBase
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Iguazio
1
+ # Copyright 2024 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -11,22 +11,21 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import typing
17
16
  from abc import ABC, abstractmethod
18
17
 
19
18
 
20
- class ModelEndpointStore(ABC):
19
+ class StoreBase(ABC):
21
20
  """
22
- An abstract class to handle the model endpoint in the DB target.
21
+ An abstract class to handle the store object in the DB target.
23
22
  """
24
23
 
25
24
  def __init__(self, project: str):
26
25
  """
27
- Initialize a new model endpoint target.
26
+ Initialize a new store target.
28
27
 
29
- :param project: The name of the project.
28
+ :param project: The name of the project.
30
29
  """
31
30
  self.project = project
32
31
 
@@ -143,3 +142,45 @@ class ModelEndpointStore(ABC):
143
142
  """
144
143
 
145
144
  pass
145
+
146
+ @abstractmethod
147
+ def write_application_result(self, event: dict[str, typing.Any]):
148
+ """
149
+ Write a new application result event in the target table.
150
+
151
+ :param event: An event dictionary that represents the application result, should be corresponded to the
152
+ schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
153
+ object.
154
+ """
155
+ pass
156
+
157
+ @abstractmethod
158
+ def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
159
+ """
160
+ Get the last analyzed time for the provided model endpoint and application.
161
+
162
+ :param endpoint_id: The unique id of the model endpoint.
163
+ :param application_name: Registered application name.
164
+
165
+ :return: Timestamp as a Unix time.
166
+ :raise: MLRunNotFoundError if last analyzed value is not found.
167
+ """
168
+ pass
169
+
170
+ @abstractmethod
171
+ def update_last_analyzed(
172
+ self,
173
+ endpoint_id: str,
174
+ application_name: str,
175
+ last_analyzed: int,
176
+ ):
177
+ """
178
+ Update the last analyzed time for the provided model endpoint and application.
179
+
180
+ :param endpoint_id: The unique id of the model endpoint.
181
+ :param application_name: Registered application name.
182
+ :param last_analyzed: Timestamp as a Unix time that represents the last analyzed time of a certain
183
+ application and model endpoint.
184
+
185
+ """
186
+ pass
@@ -0,0 +1,13 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.