mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +2 -1
- mlrun/artifacts/plots.py +9 -5
- mlrun/common/constants.py +6 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +35 -18
- mlrun/common/schemas/project.py +1 -0
- mlrun/common/types.py +7 -1
- mlrun/config.py +19 -6
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +22 -16
- mlrun/datastore/datastore.py +4 -0
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/sources.py +7 -7
- mlrun/db/base.py +14 -6
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +61 -56
- mlrun/db/nopdb.py +3 -0
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/model.py +1 -0
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +104 -301
- mlrun/model_monitoring/application.py +21 -21
- mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
- mlrun/model_monitoring/controller.py +26 -33
- mlrun/model_monitoring/db/__init__.py +16 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +45 -6
- mlrun/model_monitoring/stream_processing.py +43 -9
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +4 -36
- mlrun/projects/pipelines.py +13 -1
- mlrun/projects/project.py +279 -117
- mlrun/run.py +72 -74
- mlrun/runtimes/__init__.py +35 -0
- mlrun/runtimes/base.py +7 -1
- mlrun/runtimes/nuclio/api_gateway.py +188 -61
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +283 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
- mlrun/runtimes/nuclio/function.py +53 -1
- mlrun/runtimes/nuclio/serving.py +28 -32
- mlrun/runtimes/pod.py +27 -1
- mlrun/serving/server.py +4 -6
- mlrun/serving/states.py +41 -33
- mlrun/utils/helpers.py +34 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/mysql.py +0 -34
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0
|
@@ -13,13 +13,17 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from dataclasses import dataclass
|
|
16
|
-
from typing import Final, Optional, Protocol
|
|
16
|
+
from typing import Final, Optional, Protocol, cast
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
|
-
from pandas import DataFrame, Timestamp
|
|
19
|
+
from pandas import DataFrame, Series, Timestamp
|
|
20
20
|
|
|
21
|
+
import mlrun.artifacts
|
|
22
|
+
import mlrun.common.model_monitoring.helpers
|
|
23
|
+
import mlrun.model_monitoring.features_drift_table as mm_drift_table
|
|
21
24
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
22
25
|
MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME,
|
|
26
|
+
EventFieldType,
|
|
23
27
|
ResultKindApp,
|
|
24
28
|
ResultStatusApp,
|
|
25
29
|
)
|
|
@@ -27,7 +31,7 @@ from mlrun.model_monitoring.application import (
|
|
|
27
31
|
ModelMonitoringApplicationBase,
|
|
28
32
|
ModelMonitoringApplicationResult,
|
|
29
33
|
)
|
|
30
|
-
from mlrun.model_monitoring.
|
|
34
|
+
from mlrun.model_monitoring.metrics.histogram_distance import (
|
|
31
35
|
HellingerDistance,
|
|
32
36
|
HistogramDistanceMetric,
|
|
33
37
|
KullbackLeiblerDivergence,
|
|
@@ -115,31 +119,24 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
115
119
|
|
|
116
120
|
def _compute_metrics_per_feature(
|
|
117
121
|
self, sample_df_stats: DataFrame, feature_stats: DataFrame
|
|
118
|
-
) ->
|
|
122
|
+
) -> DataFrame:
|
|
119
123
|
"""Compute the metrics for the different features and labels"""
|
|
120
|
-
metrics_per_feature
|
|
121
|
-
metric_class
|
|
122
|
-
|
|
124
|
+
metrics_per_feature = DataFrame(
|
|
125
|
+
columns=[metric_class.NAME for metric_class in self.metrics]
|
|
126
|
+
)
|
|
123
127
|
|
|
124
|
-
for
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
assert sample_feat == reference_feat, "The features do not match"
|
|
128
|
+
for feature_name in feature_stats:
|
|
129
|
+
sample_hist = np.asarray(sample_df_stats[feature_name])
|
|
130
|
+
reference_hist = np.asarray(feature_stats[feature_name])
|
|
128
131
|
self.context.logger.info(
|
|
129
|
-
"Computing metrics for feature", feature_name=
|
|
132
|
+
"Computing metrics for feature", feature_name=feature_name
|
|
130
133
|
)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
self.
|
|
136
|
-
|
|
137
|
-
metric_name=metric_name,
|
|
138
|
-
feature_name=sample_feat,
|
|
139
|
-
)
|
|
140
|
-
metrics_per_feature[metric].append(
|
|
141
|
-
metric(distrib_t=sample_arr, distrib_u=reference_arr).compute()
|
|
142
|
-
)
|
|
134
|
+
metrics_per_feature.loc[feature_name] = { # pyright: ignore[reportCallIssue,reportArgumentType]
|
|
135
|
+
metric.NAME: metric(
|
|
136
|
+
distrib_t=sample_hist, distrib_u=reference_hist
|
|
137
|
+
).compute()
|
|
138
|
+
for metric in self.metrics
|
|
139
|
+
}
|
|
143
140
|
self.context.logger.info("Finished computing the metrics")
|
|
144
141
|
|
|
145
142
|
return metrics_per_feature
|
|
@@ -147,37 +144,37 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
147
144
|
def _add_general_drift_result(
|
|
148
145
|
self, results: list[ModelMonitoringApplicationResult], value: float
|
|
149
146
|
) -> None:
|
|
147
|
+
"""Add the general drift result to the results list and log it"""
|
|
148
|
+
status = self._value_classifier.value_to_status(value)
|
|
150
149
|
results.append(
|
|
151
150
|
ModelMonitoringApplicationResult(
|
|
152
151
|
name="general_drift",
|
|
153
152
|
value=value,
|
|
154
153
|
kind=self.METRIC_KIND,
|
|
155
|
-
status=
|
|
154
|
+
status=status,
|
|
156
155
|
)
|
|
157
156
|
)
|
|
158
157
|
|
|
159
158
|
def _get_results(
|
|
160
|
-
self, metrics_per_feature:
|
|
159
|
+
self, metrics_per_feature: DataFrame
|
|
161
160
|
) -> list[ModelMonitoringApplicationResult]:
|
|
162
161
|
"""Average the metrics over the features and add the status"""
|
|
163
162
|
results: list[ModelMonitoringApplicationResult] = []
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
if
|
|
163
|
+
|
|
164
|
+
self.context.logger.debug("Averaging metrics over the features")
|
|
165
|
+
metrics_mean = metrics_per_feature.mean().to_dict()
|
|
166
|
+
|
|
167
|
+
self.context.logger.debug("Creating the results")
|
|
168
|
+
for name, value in metrics_mean.items():
|
|
169
|
+
if name == KullbackLeiblerDivergence.NAME:
|
|
171
170
|
# This metric is not bounded from above [0, inf).
|
|
172
171
|
# No status is currently reported for KL divergence
|
|
173
172
|
status = ResultStatusApp.irrelevant
|
|
174
173
|
else:
|
|
175
174
|
status = self._value_classifier.value_to_status(value)
|
|
176
|
-
if metric_class in self._REQUIRED_METRICS:
|
|
177
|
-
hellinger_tvd_values.append(value)
|
|
178
175
|
results.append(
|
|
179
176
|
ModelMonitoringApplicationResult(
|
|
180
|
-
name=f"{
|
|
177
|
+
name=f"{name}_mean",
|
|
181
178
|
value=value,
|
|
182
179
|
kind=self.METRIC_KIND,
|
|
183
180
|
status=status,
|
|
@@ -185,16 +182,102 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
185
182
|
)
|
|
186
183
|
|
|
187
184
|
self._add_general_drift_result(
|
|
188
|
-
results=results,
|
|
185
|
+
results=results,
|
|
186
|
+
value=np.mean(
|
|
187
|
+
[
|
|
188
|
+
metrics_mean[HellingerDistance.NAME],
|
|
189
|
+
metrics_mean[TotalVarianceDistance.NAME],
|
|
190
|
+
]
|
|
191
|
+
),
|
|
189
192
|
)
|
|
190
193
|
|
|
194
|
+
self.context.logger.info("Finished with the results")
|
|
191
195
|
return results
|
|
192
196
|
|
|
197
|
+
@staticmethod
|
|
198
|
+
def _remove_timestamp_feature(
|
|
199
|
+
sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
200
|
+
) -> mlrun.common.model_monitoring.helpers.FeatureStats:
|
|
201
|
+
"""
|
|
202
|
+
Drop the 'timestamp' feature if it exists, as it is irrelevant
|
|
203
|
+
in the plotly artifact
|
|
204
|
+
"""
|
|
205
|
+
sample_set_statistics = mlrun.common.model_monitoring.helpers.FeatureStats(
|
|
206
|
+
sample_set_statistics.copy()
|
|
207
|
+
)
|
|
208
|
+
if EventFieldType.TIMESTAMP in sample_set_statistics:
|
|
209
|
+
del sample_set_statistics[EventFieldType.TIMESTAMP]
|
|
210
|
+
return sample_set_statistics
|
|
211
|
+
|
|
212
|
+
def _log_json_artifact(self, drift_per_feature_values: Series) -> None:
|
|
213
|
+
"""Log the drift values as a JSON artifact"""
|
|
214
|
+
self.context.logger.debug("Logging drift value per feature JSON artifact")
|
|
215
|
+
self.context.log_artifact(
|
|
216
|
+
mlrun.artifacts.Artifact(
|
|
217
|
+
body=drift_per_feature_values.to_json(),
|
|
218
|
+
format="json",
|
|
219
|
+
key="features_drift_results",
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
self.context.logger.debug("Logged JSON artifact successfully")
|
|
223
|
+
|
|
224
|
+
def _log_plotly_table_artifact(
|
|
225
|
+
self,
|
|
226
|
+
sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
227
|
+
inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
228
|
+
metrics_per_feature: DataFrame,
|
|
229
|
+
drift_per_feature_values: Series,
|
|
230
|
+
) -> None:
|
|
231
|
+
"""Log the Plotly drift table artifact"""
|
|
232
|
+
self.context.logger.debug(
|
|
233
|
+
"Feature stats",
|
|
234
|
+
sample_set_statistics=sample_set_statistics,
|
|
235
|
+
inputs_statistics=inputs_statistics,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
self.context.logger.debug("Computing drift results per feature")
|
|
239
|
+
drift_results = {
|
|
240
|
+
cast(str, key): (self._value_classifier.value_to_status(value), value)
|
|
241
|
+
for key, value in drift_per_feature_values.items()
|
|
242
|
+
}
|
|
243
|
+
self.context.logger.debug("Logging plotly artifact")
|
|
244
|
+
self.context.log_artifact(
|
|
245
|
+
mm_drift_table.FeaturesDriftTablePlot().produce(
|
|
246
|
+
sample_set_statistics=sample_set_statistics,
|
|
247
|
+
inputs_statistics=inputs_statistics,
|
|
248
|
+
metrics=metrics_per_feature.T.to_dict(),
|
|
249
|
+
drift_results=drift_results,
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
self.context.logger.debug("Logged plotly artifact successfully")
|
|
253
|
+
|
|
254
|
+
def _log_drift_artifacts(
|
|
255
|
+
self,
|
|
256
|
+
sample_set_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
257
|
+
inputs_statistics: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
258
|
+
metrics_per_feature: DataFrame,
|
|
259
|
+
log_json_artifact: bool = True,
|
|
260
|
+
) -> None:
|
|
261
|
+
"""Log JSON and Plotly drift data per feature artifacts"""
|
|
262
|
+
drift_per_feature_values = metrics_per_feature[
|
|
263
|
+
[HellingerDistance.NAME, TotalVarianceDistance.NAME]
|
|
264
|
+
].mean(axis=1)
|
|
265
|
+
|
|
266
|
+
if log_json_artifact:
|
|
267
|
+
self._log_json_artifact(drift_per_feature_values)
|
|
268
|
+
|
|
269
|
+
self._log_plotly_table_artifact(
|
|
270
|
+
sample_set_statistics=self._remove_timestamp_feature(sample_set_statistics),
|
|
271
|
+
inputs_statistics=inputs_statistics,
|
|
272
|
+
metrics_per_feature=metrics_per_feature,
|
|
273
|
+
drift_per_feature_values=drift_per_feature_values,
|
|
274
|
+
)
|
|
275
|
+
|
|
193
276
|
def do_tracking(
|
|
194
277
|
self,
|
|
195
278
|
application_name: str,
|
|
196
|
-
sample_df_stats:
|
|
197
|
-
feature_stats:
|
|
279
|
+
sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
280
|
+
feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
|
|
198
281
|
sample_df: DataFrame,
|
|
199
282
|
start_infer_time: Timestamp,
|
|
200
283
|
end_infer_time: Timestamp,
|
|
@@ -210,7 +293,14 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
210
293
|
"""
|
|
211
294
|
self.context.logger.debug("Starting to run the application")
|
|
212
295
|
metrics_per_feature = self._compute_metrics_per_feature(
|
|
213
|
-
sample_df_stats=sample_df_stats,
|
|
296
|
+
sample_df_stats=self.dict_to_histogram(sample_df_stats),
|
|
297
|
+
feature_stats=self.dict_to_histogram(feature_stats),
|
|
298
|
+
)
|
|
299
|
+
self.context.logger.debug("Saving artifacts")
|
|
300
|
+
self._log_drift_artifacts(
|
|
301
|
+
inputs_statistics=feature_stats,
|
|
302
|
+
sample_set_statistics=sample_df_stats,
|
|
303
|
+
metrics_per_feature=metrics_per_feature,
|
|
214
304
|
)
|
|
215
305
|
self.context.logger.debug("Computing average per metric")
|
|
216
306
|
results = self._get_results(metrics_per_feature)
|
|
@@ -21,25 +21,24 @@ from collections.abc import Iterator
|
|
|
21
21
|
from typing import Any, NamedTuple, Optional, Union, cast
|
|
22
22
|
|
|
23
23
|
import nuclio
|
|
24
|
-
from v3io.dataplane.response import HttpResponseError
|
|
25
24
|
|
|
26
25
|
import mlrun
|
|
27
26
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
28
27
|
import mlrun.data_types.infer
|
|
29
28
|
import mlrun.feature_store as fstore
|
|
29
|
+
import mlrun.model_monitoring.db.stores
|
|
30
30
|
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
|
|
31
31
|
from mlrun.datastore import get_stream_pusher
|
|
32
32
|
from mlrun.datastore.targets import ParquetTarget
|
|
33
33
|
from mlrun.errors import err_to_str
|
|
34
|
-
from mlrun.model_monitoring.batch import calculate_inputs_statistics
|
|
35
34
|
from mlrun.model_monitoring.helpers import (
|
|
36
35
|
_BatchDict,
|
|
37
36
|
batch_dict2timedelta,
|
|
37
|
+
calculate_inputs_statistics,
|
|
38
38
|
get_monitoring_parquet_path,
|
|
39
39
|
get_stream_path,
|
|
40
40
|
)
|
|
41
|
-
from mlrun.utils import
|
|
42
|
-
from mlrun.utils.v3io_clients import get_v3io_client
|
|
41
|
+
from mlrun.utils import datetime_now, logger
|
|
43
42
|
|
|
44
43
|
|
|
45
44
|
class _Interval(NamedTuple):
|
|
@@ -48,8 +47,6 @@ class _Interval(NamedTuple):
|
|
|
48
47
|
|
|
49
48
|
|
|
50
49
|
class _BatchWindow:
|
|
51
|
-
V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
|
|
52
|
-
|
|
53
50
|
def __init__(
|
|
54
51
|
self,
|
|
55
52
|
project: str,
|
|
@@ -65,27 +62,22 @@ class _BatchWindow:
|
|
|
65
62
|
All the time values are in seconds.
|
|
66
63
|
The start and stop time are in seconds since the epoch.
|
|
67
64
|
"""
|
|
65
|
+
self.project = project
|
|
68
66
|
self._endpoint = endpoint
|
|
69
67
|
self._application = application
|
|
70
68
|
self._first_request = first_request
|
|
71
|
-
self._kv_storage = get_v3io_client(
|
|
72
|
-
endpoint=mlrun.mlconf.v3io_api,
|
|
73
|
-
# Avoid noisy warning logs before the KV table is created
|
|
74
|
-
logger=create_logger(name="v3io_client", level="error"),
|
|
75
|
-
).kv
|
|
76
|
-
self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
|
|
77
69
|
self._stop = last_updated
|
|
78
70
|
self._step = timedelta_seconds
|
|
71
|
+
self._db = mlrun.model_monitoring.get_store_object(project=self.project)
|
|
79
72
|
self._start = self._get_last_analyzed()
|
|
80
73
|
|
|
81
74
|
def _get_last_analyzed(self) -> Optional[int]:
|
|
82
75
|
try:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
key=self._application,
|
|
76
|
+
last_analyzed = self._db.get_last_analyzed(
|
|
77
|
+
endpoint_id=self._endpoint,
|
|
78
|
+
application_name=self._application,
|
|
87
79
|
)
|
|
88
|
-
except
|
|
80
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
89
81
|
logger.info(
|
|
90
82
|
"No last analyzed time was found for this endpoint and "
|
|
91
83
|
"application, as this is probably the first time this "
|
|
@@ -96,7 +88,7 @@ class _BatchWindow:
|
|
|
96
88
|
first_request=self._first_request,
|
|
97
89
|
last_updated=self._stop,
|
|
98
90
|
)
|
|
99
|
-
|
|
91
|
+
|
|
100
92
|
if self._first_request and self._stop:
|
|
101
93
|
# TODO : Change the timedelta according to the policy.
|
|
102
94
|
first_period_in_seconds = max(
|
|
@@ -108,7 +100,6 @@ class _BatchWindow:
|
|
|
108
100
|
)
|
|
109
101
|
return self._first_request
|
|
110
102
|
|
|
111
|
-
last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
|
|
112
103
|
logger.info(
|
|
113
104
|
"Got the last analyzed time for this endpoint and application",
|
|
114
105
|
endpoint=self._endpoint,
|
|
@@ -124,11 +115,11 @@ class _BatchWindow:
|
|
|
124
115
|
application=self._application,
|
|
125
116
|
last_analyzed=last_analyzed,
|
|
126
117
|
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
118
|
+
|
|
119
|
+
self._db.update_last_analyzed(
|
|
120
|
+
endpoint_id=self._endpoint,
|
|
121
|
+
application_name=self._application,
|
|
122
|
+
last_analyzed=last_analyzed,
|
|
132
123
|
)
|
|
133
124
|
|
|
134
125
|
def get_intervals(
|
|
@@ -301,7 +292,7 @@ class MonitoringApplicationController:
|
|
|
301
292
|
f"Initializing {self.__class__.__name__}", project=project
|
|
302
293
|
)
|
|
303
294
|
|
|
304
|
-
self.db = mlrun.model_monitoring.
|
|
295
|
+
self.db = mlrun.model_monitoring.get_store_object(project=project)
|
|
305
296
|
|
|
306
297
|
self._batch_window_generator = _BatchWindowGenerator(
|
|
307
298
|
batch_dict=json.loads(
|
|
@@ -359,7 +350,12 @@ class MonitoringApplicationController:
|
|
|
359
350
|
{
|
|
360
351
|
app.metadata.name
|
|
361
352
|
for app in monitoring_functions
|
|
362
|
-
if
|
|
353
|
+
if (
|
|
354
|
+
app.status.state == "ready"
|
|
355
|
+
# workaround for the default app, as its `status.state` is `None`
|
|
356
|
+
or app.metadata.name
|
|
357
|
+
== mm_constants.MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME
|
|
358
|
+
)
|
|
363
359
|
}
|
|
364
360
|
)
|
|
365
361
|
if not applications_names:
|
|
@@ -367,6 +363,10 @@ class MonitoringApplicationController:
|
|
|
367
363
|
"No monitoring functions found", project=self.project
|
|
368
364
|
)
|
|
369
365
|
return
|
|
366
|
+
self.context.logger.info(
|
|
367
|
+
"Starting to iterate over the applications",
|
|
368
|
+
applications=applications_names,
|
|
369
|
+
)
|
|
370
370
|
|
|
371
371
|
except Exception as e:
|
|
372
372
|
self.context.logger.error(
|
|
@@ -445,13 +445,6 @@ class MonitoringApplicationController:
|
|
|
445
445
|
m_fs = fstore.get_feature_set(
|
|
446
446
|
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
447
447
|
)
|
|
448
|
-
labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
|
|
449
|
-
if labels:
|
|
450
|
-
if isinstance(labels, str):
|
|
451
|
-
labels = json.loads(labels)
|
|
452
|
-
for label in labels:
|
|
453
|
-
if label not in list(m_fs.spec.features.keys()):
|
|
454
|
-
m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
|
|
455
448
|
|
|
456
449
|
for application in applications_names:
|
|
457
450
|
batch_window = batch_window_generator.get_batch_window(
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .stores import ObjectStoreFactory, get_store_object
|
|
16
|
+
from .stores.base import StoreBase
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,60 +16,54 @@
|
|
|
16
16
|
|
|
17
17
|
import enum
|
|
18
18
|
import typing
|
|
19
|
+
import warnings
|
|
19
20
|
|
|
20
21
|
import mlrun.common.schemas.secret
|
|
21
22
|
import mlrun.errors
|
|
22
23
|
|
|
23
|
-
from .
|
|
24
|
+
from .base import StoreBase
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
class
|
|
27
|
-
"""Enum class to handle the different store type values for saving
|
|
27
|
+
class ObjectStoreFactory(enum.Enum):
|
|
28
|
+
"""Enum class to handle the different store type values for saving model monitoring records."""
|
|
28
29
|
|
|
29
30
|
v3io_nosql = "v3io-nosql"
|
|
30
31
|
SQL = "sql"
|
|
31
32
|
|
|
32
|
-
def
|
|
33
|
+
def to_object_store(
|
|
33
34
|
self,
|
|
34
35
|
project: str,
|
|
35
36
|
access_key: str = None,
|
|
36
|
-
endpoint_store_connection: str = None,
|
|
37
37
|
secret_provider: typing.Callable = None,
|
|
38
|
-
) ->
|
|
38
|
+
) -> StoreBase:
|
|
39
39
|
"""
|
|
40
|
-
Return a
|
|
41
|
-
|
|
42
|
-
:param project:
|
|
43
|
-
:param access_key:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
:param endpoint_store_connection: A valid connection string for model endpoint target. Contains several
|
|
47
|
-
key-value pairs that required for the database connection.
|
|
48
|
-
e.g. A root user with password 1234, tries to connect a schema called
|
|
49
|
-
mlrun within a local MySQL DB instance:
|
|
50
|
-
'mysql+pymysql://root:1234@localhost:3306/mlrun'.
|
|
40
|
+
Return a StoreBase object based on the provided enum value.
|
|
41
|
+
|
|
42
|
+
:param project: The name of the project.
|
|
43
|
+
:param access_key: Access key with permission to the DB table. Note that if access key is None
|
|
44
|
+
and the endpoint target is from type KV then the access key will be
|
|
45
|
+
retrieved from the environment variable.
|
|
51
46
|
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
52
47
|
|
|
53
|
-
:return: `
|
|
48
|
+
:return: `StoreBase` object.
|
|
54
49
|
|
|
55
50
|
"""
|
|
56
51
|
|
|
57
|
-
if self
|
|
58
|
-
from .
|
|
52
|
+
if self == self.v3io_nosql:
|
|
53
|
+
from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
|
|
59
54
|
|
|
60
55
|
# Get V3IO access key from env
|
|
61
56
|
access_key = access_key or mlrun.mlconf.get_v3io_access_key()
|
|
62
57
|
|
|
63
|
-
return
|
|
58
|
+
return KVStoreBase(project=project, access_key=access_key)
|
|
64
59
|
|
|
65
60
|
# Assuming SQL store target if store type is not KV.
|
|
66
61
|
# Update these lines once there are more than two store target types.
|
|
67
62
|
|
|
68
|
-
from .
|
|
63
|
+
from mlrun.model_monitoring.db.stores.sqldb.sql_store import SQLStoreBase
|
|
69
64
|
|
|
70
|
-
return
|
|
65
|
+
return SQLStoreBase(
|
|
71
66
|
project=project,
|
|
72
|
-
sql_connection_string=endpoint_store_connection,
|
|
73
67
|
secret_provider=secret_provider,
|
|
74
68
|
)
|
|
75
69
|
|
|
@@ -88,7 +82,24 @@ def get_model_endpoint_store(
|
|
|
88
82
|
project: str,
|
|
89
83
|
access_key: str = None,
|
|
90
84
|
secret_provider: typing.Callable = None,
|
|
91
|
-
) ->
|
|
85
|
+
) -> StoreBase:
|
|
86
|
+
# Leaving here for backwards compatibility
|
|
87
|
+
warnings.warn(
|
|
88
|
+
"The 'get_model_endpoint_store' function is deprecated and will be removed in 1.9.0. "
|
|
89
|
+
"Please use `get_store_object` instead.",
|
|
90
|
+
# TODO: remove in 1.9.0
|
|
91
|
+
FutureWarning,
|
|
92
|
+
)
|
|
93
|
+
return get_store_object(
|
|
94
|
+
project=project, access_key=access_key, secret_provider=secret_provider
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_store_object(
|
|
99
|
+
project: str,
|
|
100
|
+
access_key: str = None,
|
|
101
|
+
secret_provider: typing.Callable = None,
|
|
102
|
+
) -> StoreBase:
|
|
92
103
|
"""
|
|
93
104
|
Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
|
|
94
105
|
|
|
@@ -96,16 +107,14 @@ def get_model_endpoint_store(
|
|
|
96
107
|
:param access_key: Access key with permission to the DB table.
|
|
97
108
|
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
98
109
|
|
|
99
|
-
:return: `
|
|
100
|
-
model
|
|
110
|
+
:return: `StoreBase` object. Using this object, the user can apply different operations on the
|
|
111
|
+
model monitoring record such as write, update, get and delete a model endpoint.
|
|
101
112
|
"""
|
|
102
113
|
|
|
103
|
-
# Get store type value from
|
|
104
|
-
|
|
105
|
-
mlrun.mlconf.model_endpoint_monitoring.store_type
|
|
106
|
-
)
|
|
114
|
+
# Get store type value from ObjectStoreFactory enum class
|
|
115
|
+
store_type = ObjectStoreFactory(mlrun.mlconf.model_endpoint_monitoring.store_type)
|
|
107
116
|
|
|
108
|
-
# Convert into
|
|
109
|
-
return
|
|
117
|
+
# Convert into store target object
|
|
118
|
+
return store_type.to_object_store(
|
|
110
119
|
project=project, access_key=access_key, secret_provider=secret_provider
|
|
111
120
|
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .store import StoreBase
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -11,22 +11,21 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import typing
|
|
17
16
|
from abc import ABC, abstractmethod
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
class
|
|
19
|
+
class StoreBase(ABC):
|
|
21
20
|
"""
|
|
22
|
-
An abstract class to handle the
|
|
21
|
+
An abstract class to handle the store object in the DB target.
|
|
23
22
|
"""
|
|
24
23
|
|
|
25
24
|
def __init__(self, project: str):
|
|
26
25
|
"""
|
|
27
|
-
Initialize a new
|
|
26
|
+
Initialize a new store target.
|
|
28
27
|
|
|
29
|
-
:param project:
|
|
28
|
+
:param project: The name of the project.
|
|
30
29
|
"""
|
|
31
30
|
self.project = project
|
|
32
31
|
|
|
@@ -143,3 +142,45 @@ class ModelEndpointStore(ABC):
|
|
|
143
142
|
"""
|
|
144
143
|
|
|
145
144
|
pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def write_application_result(self, event: dict[str, typing.Any]):
|
|
148
|
+
"""
|
|
149
|
+
Write a new application result event in the target table.
|
|
150
|
+
|
|
151
|
+
:param event: An event dictionary that represents the application result, should be corresponded to the
|
|
152
|
+
schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
153
|
+
object.
|
|
154
|
+
"""
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
@abstractmethod
|
|
158
|
+
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
159
|
+
"""
|
|
160
|
+
Get the last analyzed time for the provided model endpoint and application.
|
|
161
|
+
|
|
162
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
163
|
+
:param application_name: Registered application name.
|
|
164
|
+
|
|
165
|
+
:return: Timestamp as a Unix time.
|
|
166
|
+
:raise: MLRunNotFoundError if last analyzed value is not found.
|
|
167
|
+
"""
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
@abstractmethod
|
|
171
|
+
def update_last_analyzed(
|
|
172
|
+
self,
|
|
173
|
+
endpoint_id: str,
|
|
174
|
+
application_name: str,
|
|
175
|
+
last_analyzed: int,
|
|
176
|
+
):
|
|
177
|
+
"""
|
|
178
|
+
Update the last analyzed time for the provided model endpoint and application.
|
|
179
|
+
|
|
180
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
181
|
+
:param application_name: Registered application name.
|
|
182
|
+
:param last_analyzed: Timestamp as a Unix time that represents the last analyzed time of a certain
|
|
183
|
+
application and model endpoint.
|
|
184
|
+
|
|
185
|
+
"""
|
|
186
|
+
pass
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|