mlrun 1.7.0rc15__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -4
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +7 -1
- mlrun/artifacts/base.py +28 -3
- mlrun/artifacts/dataset.py +8 -0
- mlrun/artifacts/manager.py +18 -0
- mlrun/artifacts/model.py +7 -0
- mlrun/artifacts/plots.py +13 -0
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -1
- mlrun/common/schemas/model_monitoring/constants.py +16 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +60 -1
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +4 -1
- mlrun/datastore/datastore_profile.py +10 -7
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +97 -43
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/launcher/base.py +3 -2
- mlrun/lists.py +2 -0
- mlrun/model.py +7 -2
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/applications/_application_steps.py +1 -2
- mlrun/model_monitoring/applications/context.py +1 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +64 -38
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +56 -202
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/stream_processing.py +46 -210
- mlrun/model_monitoring/writer.py +49 -99
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +19 -200
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +55 -14
- mlrun/render.py +9 -3
- mlrun/run.py +5 -38
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +8 -34
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +0 -38
- mlrun/utils/helpers.py +45 -31
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +9 -4
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +15 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +3 -2
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +71 -65
- mlrun/kfpops.py +0 -860
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
# for backwards compatibility
|
|
17
17
|
|
|
18
|
-
from .db import get_store_object
|
|
18
|
+
from .db import get_store_object, get_tsdb_connector
|
|
19
19
|
from .helpers import get_stream_path
|
|
20
20
|
from .model_endpoint import ModelEndpoint
|
|
21
21
|
from .tracking_policy import TrackingPolicy
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import copy
|
|
15
14
|
import json
|
|
16
15
|
import typing
|
|
17
16
|
from typing import Optional
|
|
@@ -138,7 +137,7 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
138
137
|
if not event.get("mlrun_context"):
|
|
139
138
|
application_context = MonitoringApplicationContext().from_dict(
|
|
140
139
|
event,
|
|
141
|
-
context=
|
|
140
|
+
context=self.context,
|
|
142
141
|
model_endpoint_dict=self.model_endpoints,
|
|
143
142
|
)
|
|
144
143
|
else:
|
|
@@ -113,7 +113,7 @@ class MonitoringApplicationContext(MLClientCtx):
|
|
|
113
113
|
attrs.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
|
|
114
114
|
)
|
|
115
115
|
self._sample_df_stats = json.loads(
|
|
116
|
-
attrs.get(mm_constants.ApplicationEvent.
|
|
116
|
+
attrs.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
|
|
117
117
|
)
|
|
118
118
|
|
|
119
119
|
self.endpoint_id = attrs.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
|
|
@@ -11,9 +11,10 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
|
|
15
|
+
import json
|
|
15
16
|
from dataclasses import dataclass
|
|
16
|
-
from typing import Final, Optional, Protocol, cast
|
|
17
|
+
from typing import Final, Optional, Protocol, Union, cast
|
|
17
18
|
|
|
18
19
|
import numpy as np
|
|
19
20
|
from pandas import DataFrame, Series
|
|
@@ -90,9 +91,27 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
90
91
|
"""
|
|
91
92
|
MLRun's default data drift application for model monitoring.
|
|
92
93
|
|
|
93
|
-
The application calculates
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
The application expects tabular numerical data, and calculates three metrics over the features' histograms.
|
|
95
|
+
The three metrics are:
|
|
96
|
+
|
|
97
|
+
* Hellinger distance.
|
|
98
|
+
* Total variance distance.
|
|
99
|
+
* Kullback-Leibler divergence.
|
|
100
|
+
|
|
101
|
+
Each metric is calculated over all the features individually and the mean is taken as the metric value.
|
|
102
|
+
The average of Hellinger and total variance distance is taken as the result.
|
|
103
|
+
|
|
104
|
+
The application logs two artifacts:
|
|
105
|
+
|
|
106
|
+
* A JSON with the general drift per feature.
|
|
107
|
+
* A plotly table different metrics per feature.
|
|
108
|
+
|
|
109
|
+
This application is deployed by default when calling:
|
|
110
|
+
|
|
111
|
+
.. code-block:: python
|
|
112
|
+
|
|
113
|
+
project.enable_model_monitoring()
|
|
114
|
+
|
|
96
115
|
"""
|
|
97
116
|
|
|
98
117
|
NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
|
|
@@ -107,8 +126,6 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
107
126
|
|
|
108
127
|
def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
|
|
109
128
|
"""
|
|
110
|
-
Initialize the data drift application.
|
|
111
|
-
|
|
112
129
|
:param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
|
|
113
130
|
If not provided, the default `DataDriftClassifier()` is used.
|
|
114
131
|
"""
|
|
@@ -146,35 +163,46 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
146
163
|
|
|
147
164
|
return metrics_per_feature
|
|
148
165
|
|
|
149
|
-
def
|
|
166
|
+
def _get_general_drift_result(
|
|
150
167
|
self,
|
|
151
168
|
metrics: list[mm_results.ModelMonitoringApplicationMetric],
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
169
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
170
|
+
metrics_per_feature: DataFrame,
|
|
171
|
+
) -> mm_results.ModelMonitoringApplicationResult:
|
|
172
|
+
"""Get the general drift result from the metrics list"""
|
|
173
|
+
value = cast(
|
|
174
|
+
float,
|
|
175
|
+
np.mean(
|
|
176
|
+
[
|
|
177
|
+
metric.value
|
|
178
|
+
for metric in metrics
|
|
179
|
+
if metric.name
|
|
180
|
+
in [
|
|
181
|
+
f"{HellingerDistance.NAME}_mean",
|
|
182
|
+
f"{TotalVarianceDistance.NAME}_mean",
|
|
183
|
+
]
|
|
162
184
|
]
|
|
163
|
-
|
|
185
|
+
),
|
|
164
186
|
)
|
|
165
187
|
|
|
166
188
|
status = self._value_classifier.value_to_status(value)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
189
|
+
return mm_results.ModelMonitoringApplicationResult(
|
|
190
|
+
name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
|
|
191
|
+
value=value,
|
|
192
|
+
kind=ResultKindApp.data_drift,
|
|
193
|
+
status=status,
|
|
194
|
+
extra_data={
|
|
195
|
+
EventFieldType.CURRENT_STATS: json.dumps(
|
|
196
|
+
monitoring_context.feature_stats
|
|
197
|
+
),
|
|
198
|
+
EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
|
|
199
|
+
EventFieldType.DRIFT_STATUS: status.value,
|
|
200
|
+
},
|
|
174
201
|
)
|
|
175
202
|
|
|
203
|
+
@staticmethod
|
|
176
204
|
def _get_metrics(
|
|
177
|
-
|
|
205
|
+
metrics_per_feature: DataFrame,
|
|
178
206
|
) -> list[mm_results.ModelMonitoringApplicationMetric]:
|
|
179
207
|
"""Average the metrics over the features and add the status"""
|
|
180
208
|
metrics: list[mm_results.ModelMonitoringApplicationMetric] = []
|
|
@@ -206,8 +234,8 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
206
234
|
del sample_set_statistics[EventFieldType.TIMESTAMP]
|
|
207
235
|
return sample_set_statistics
|
|
208
236
|
|
|
237
|
+
@staticmethod
|
|
209
238
|
def _log_json_artifact(
|
|
210
|
-
self,
|
|
211
239
|
drift_per_feature_values: Series,
|
|
212
240
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
213
241
|
) -> None:
|
|
@@ -247,7 +275,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
247
275
|
mm_drift_table.FeaturesDriftTablePlot().produce(
|
|
248
276
|
sample_set_statistics=sample_set_statistics,
|
|
249
277
|
inputs_statistics=inputs_statistics,
|
|
250
|
-
metrics=metrics_per_feature.T.to_dict(),
|
|
278
|
+
metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
|
|
251
279
|
drift_results=drift_results,
|
|
252
280
|
)
|
|
253
281
|
)
|
|
@@ -281,7 +309,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
281
309
|
self,
|
|
282
310
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
283
311
|
) -> list[
|
|
284
|
-
|
|
312
|
+
Union[
|
|
285
313
|
mm_results.ModelMonitoringApplicationResult,
|
|
286
314
|
mm_results.ModelMonitoringApplicationMetric,
|
|
287
315
|
]
|
|
@@ -308,15 +336,13 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
308
336
|
metrics_per_feature=metrics_per_feature,
|
|
309
337
|
)
|
|
310
338
|
monitoring_context.logger.debug("Computing average per metric")
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
] = self._get_metrics(metrics_per_feature)
|
|
317
|
-
self._add_general_drift_result(
|
|
318
|
-
metrics=metrics_and_result,
|
|
339
|
+
metrics = self._get_metrics(metrics_per_feature)
|
|
340
|
+
result = self._get_general_drift_result(
|
|
341
|
+
metrics=metrics,
|
|
342
|
+
monitoring_context=monitoring_context,
|
|
343
|
+
metrics_per_feature=metrics_per_feature,
|
|
319
344
|
)
|
|
345
|
+
metrics_and_result = metrics + [result]
|
|
320
346
|
monitoring_context.logger.debug(
|
|
321
347
|
"Finished running the application", results=metrics_and_result
|
|
322
348
|
)
|
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
import typing
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
17
|
|
|
18
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
19
|
+
|
|
18
20
|
|
|
19
21
|
class StoreBase(ABC):
|
|
20
22
|
"""
|
|
@@ -62,12 +64,10 @@ class StoreBase(ABC):
|
|
|
62
64
|
pass
|
|
63
65
|
|
|
64
66
|
@abstractmethod
|
|
65
|
-
def delete_model_endpoints_resources(self
|
|
67
|
+
def delete_model_endpoints_resources(self):
|
|
66
68
|
"""
|
|
67
69
|
Delete all model endpoints resources.
|
|
68
70
|
|
|
69
|
-
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
70
|
-
|
|
71
71
|
"""
|
|
72
72
|
pass
|
|
73
73
|
|
|
@@ -112,45 +112,18 @@ class StoreBase(ABC):
|
|
|
112
112
|
pass
|
|
113
113
|
|
|
114
114
|
@abstractmethod
|
|
115
|
-
def
|
|
115
|
+
def write_application_event(
|
|
116
116
|
self,
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
end: str = "now",
|
|
121
|
-
access_key: str = None,
|
|
122
|
-
) -> dict[str, list[tuple[str, float]]]:
|
|
123
|
-
"""
|
|
124
|
-
Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
|
|
125
|
-
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
126
|
-
|
|
127
|
-
:param endpoint_id: The unique id of the model endpoint.
|
|
128
|
-
:param metrics: A list of real-time metrics to return for the model endpoint.
|
|
129
|
-
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
130
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
131
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
132
|
-
earliest time.
|
|
133
|
-
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
134
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
135
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
136
|
-
earliest time.
|
|
137
|
-
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
138
|
-
provided, the access key will be retrieved from the environment variables.
|
|
139
|
-
|
|
140
|
-
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
141
|
-
includes timestamps and the values.
|
|
142
|
-
"""
|
|
143
|
-
|
|
144
|
-
pass
|
|
145
|
-
|
|
146
|
-
@abstractmethod
|
|
147
|
-
def write_application_result(self, event: dict[str, typing.Any]):
|
|
117
|
+
event: dict[str, typing.Any],
|
|
118
|
+
kind: mm_constants.WriterEventKind = mm_constants.WriterEventKind.RESULT,
|
|
119
|
+
):
|
|
148
120
|
"""
|
|
149
|
-
Write a new
|
|
121
|
+
Write a new event in the target table.
|
|
150
122
|
|
|
151
123
|
:param event: An event dictionary that represents the application result, should be corresponded to the
|
|
152
124
|
schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
153
125
|
object.
|
|
126
|
+
:param kind: The type of the event, can be either "result" or "metric".
|
|
154
127
|
"""
|
|
155
128
|
pass
|
|
156
129
|
|
|
@@ -21,12 +21,11 @@ import pandas as pd
|
|
|
21
21
|
import sqlalchemy
|
|
22
22
|
|
|
23
23
|
import mlrun.common.model_monitoring.helpers
|
|
24
|
-
import mlrun.common.schemas.model_monitoring
|
|
24
|
+
import mlrun.common.schemas.model_monitoring as mm_constants
|
|
25
25
|
import mlrun.model_monitoring.db
|
|
26
26
|
import mlrun.model_monitoring.db.stores.sqldb.models
|
|
27
27
|
import mlrun.model_monitoring.helpers
|
|
28
28
|
from mlrun.common.db.sql_session import create_session, get_engine
|
|
29
|
-
from mlrun.utils import logger
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
@@ -72,9 +71,9 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
72
71
|
connection_string=self._sql_connection_string
|
|
73
72
|
)
|
|
74
73
|
)
|
|
75
|
-
self._tables[
|
|
76
|
-
|
|
77
|
-
|
|
74
|
+
self._tables[mm_constants.EventFieldType.MODEL_ENDPOINTS] = (
|
|
75
|
+
self.ModelEndpointsTable
|
|
76
|
+
)
|
|
78
77
|
|
|
79
78
|
def _init_application_results_table(self):
|
|
80
79
|
self.ApplicationResultsTable = (
|
|
@@ -82,17 +81,17 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
82
81
|
connection_string=self._sql_connection_string
|
|
83
82
|
)
|
|
84
83
|
)
|
|
85
|
-
self._tables[
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
self._tables[mm_constants.FileTargetKind.APP_RESULTS] = (
|
|
85
|
+
self.ApplicationResultsTable
|
|
86
|
+
)
|
|
88
87
|
|
|
89
88
|
def _init_monitoring_schedules_table(self):
|
|
90
89
|
self.MonitoringSchedulesTable = mlrun.model_monitoring.db.stores.sqldb.models._get_monitoring_schedules_table(
|
|
91
90
|
connection_string=self._sql_connection_string
|
|
92
91
|
)
|
|
93
|
-
self._tables[
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
self._tables[mm_constants.FileTargetKind.MONITORING_SCHEDULES] = (
|
|
93
|
+
self.MonitoringSchedulesTable
|
|
94
|
+
)
|
|
96
95
|
|
|
97
96
|
def _write(self, table: str, event: dict[str, typing.Any]):
|
|
98
97
|
"""
|
|
@@ -183,14 +182,12 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
183
182
|
"""
|
|
184
183
|
|
|
185
184
|
# Adjust timestamps fields
|
|
186
|
-
endpoint[
|
|
187
|
-
|
|
188
|
-
)[
|
|
189
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.LAST_REQUEST
|
|
185
|
+
endpoint[mm_constants.EventFieldType.FIRST_REQUEST] = (endpoint)[
|
|
186
|
+
mm_constants.EventFieldType.LAST_REQUEST
|
|
190
187
|
] = mlrun.utils.datetime_now()
|
|
191
188
|
|
|
192
189
|
self._write(
|
|
193
|
-
table=
|
|
190
|
+
table=mm_constants.EventFieldType.MODEL_ENDPOINTS,
|
|
194
191
|
event=endpoint,
|
|
195
192
|
)
|
|
196
193
|
|
|
@@ -207,13 +204,9 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
207
204
|
"""
|
|
208
205
|
self._init_model_endpoints_table()
|
|
209
206
|
|
|
210
|
-
attributes.pop(
|
|
211
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID, None
|
|
212
|
-
)
|
|
207
|
+
attributes.pop(mm_constants.EventFieldType.ENDPOINT_ID, None)
|
|
213
208
|
|
|
214
|
-
filter_endpoint = {
|
|
215
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID: endpoint_id
|
|
216
|
-
}
|
|
209
|
+
filter_endpoint = {mm_constants.EventFieldType.UID: endpoint_id}
|
|
217
210
|
|
|
218
211
|
self._update(
|
|
219
212
|
attributes=attributes, table=self.ModelEndpointsTable, **filter_endpoint
|
|
@@ -227,9 +220,7 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
227
220
|
"""
|
|
228
221
|
self._init_model_endpoints_table()
|
|
229
222
|
|
|
230
|
-
filter_endpoint = {
|
|
231
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID: endpoint_id
|
|
232
|
-
}
|
|
223
|
+
filter_endpoint = {mm_constants.EventFieldType.UID: endpoint_id}
|
|
233
224
|
# Delete the model endpoint record using sqlalchemy ORM
|
|
234
225
|
self._delete(table=self.ModelEndpointsTable, **filter_endpoint)
|
|
235
226
|
|
|
@@ -249,9 +240,7 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
249
240
|
self._init_model_endpoints_table()
|
|
250
241
|
|
|
251
242
|
# Get the model endpoint record using sqlalchemy ORM
|
|
252
|
-
filter_endpoint = {
|
|
253
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID: endpoint_id
|
|
254
|
-
}
|
|
243
|
+
filter_endpoint = {mm_constants.EventFieldType.UID: endpoint_id}
|
|
255
244
|
endpoint_record = self._get(table=self.ModelEndpointsTable, **filter_endpoint)
|
|
256
245
|
|
|
257
246
|
if not endpoint_record:
|
|
@@ -303,36 +292,32 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
303
292
|
query = self._filter_values(
|
|
304
293
|
query=query,
|
|
305
294
|
model_endpoints_table=model_endpoints_table,
|
|
306
|
-
key_filter=
|
|
295
|
+
key_filter=mm_constants.EventFieldType.MODEL,
|
|
307
296
|
filtered_values=[model],
|
|
308
297
|
)
|
|
309
298
|
if function:
|
|
310
299
|
query = self._filter_values(
|
|
311
300
|
query=query,
|
|
312
301
|
model_endpoints_table=model_endpoints_table,
|
|
313
|
-
key_filter=
|
|
302
|
+
key_filter=mm_constants.EventFieldType.FUNCTION,
|
|
314
303
|
filtered_values=[function],
|
|
315
304
|
)
|
|
316
305
|
if uids:
|
|
317
306
|
query = self._filter_values(
|
|
318
307
|
query=query,
|
|
319
308
|
model_endpoints_table=model_endpoints_table,
|
|
320
|
-
key_filter=
|
|
309
|
+
key_filter=mm_constants.EventFieldType.UID,
|
|
321
310
|
filtered_values=uids,
|
|
322
311
|
combined=False,
|
|
323
312
|
)
|
|
324
313
|
if top_level:
|
|
325
|
-
node_ep = str(
|
|
326
|
-
|
|
327
|
-
)
|
|
328
|
-
router_ep = str(
|
|
329
|
-
mlrun.common.schemas.model_monitoring.EndpointType.ROUTER.value
|
|
330
|
-
)
|
|
314
|
+
node_ep = str(mm_constants.EndpointType.NODE_EP.value)
|
|
315
|
+
router_ep = str(mm_constants.EndpointType.ROUTER.value)
|
|
331
316
|
endpoint_types = [node_ep, router_ep]
|
|
332
317
|
query = self._filter_values(
|
|
333
318
|
query=query,
|
|
334
319
|
model_endpoints_table=model_endpoints_table,
|
|
335
|
-
key_filter=
|
|
320
|
+
key_filter=mm_constants.EventFieldType.ENDPOINT_TYPE,
|
|
336
321
|
filtered_values=endpoint_types,
|
|
337
322
|
combined=False,
|
|
338
323
|
)
|
|
@@ -350,18 +335,28 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
350
335
|
|
|
351
336
|
return endpoint_list
|
|
352
337
|
|
|
353
|
-
def
|
|
338
|
+
def write_application_event(
|
|
339
|
+
self,
|
|
340
|
+
event: dict[str, typing.Any],
|
|
341
|
+
kind: mm_constants.WriterEventKind = mm_constants.WriterEventKind.RESULT,
|
|
342
|
+
):
|
|
354
343
|
"""
|
|
355
|
-
Write a new application
|
|
344
|
+
Write a new application event in the target table.
|
|
356
345
|
|
|
357
346
|
:param event: An event dictionary that represents the application result, should be corresponded to the
|
|
358
|
-
schema defined in the :py:class:`~
|
|
347
|
+
schema defined in the :py:class:`~mm_constants.constants.WriterEvent`
|
|
359
348
|
object.
|
|
349
|
+
:param kind: The type of the event, can be either "result" or "metric".
|
|
360
350
|
"""
|
|
351
|
+
|
|
352
|
+
if kind == mm_constants.WriterEventKind.METRIC:
|
|
353
|
+
# TODO : Implement the logic for writing metrics to MySQL
|
|
354
|
+
return
|
|
355
|
+
|
|
361
356
|
self._init_application_results_table()
|
|
362
357
|
|
|
363
358
|
application_filter_dict = {
|
|
364
|
-
|
|
359
|
+
mm_constants.EventFieldType.UID: self._generate_application_result_uid(
|
|
365
360
|
event
|
|
366
361
|
)
|
|
367
362
|
}
|
|
@@ -372,11 +367,11 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
372
367
|
if application_record:
|
|
373
368
|
self._convert_to_datetime(
|
|
374
369
|
event=event,
|
|
375
|
-
key=
|
|
370
|
+
key=mm_constants.WriterEvent.START_INFER_TIME,
|
|
376
371
|
)
|
|
377
372
|
self._convert_to_datetime(
|
|
378
373
|
event=event,
|
|
379
|
-
key=
|
|
374
|
+
key=mm_constants.WriterEvent.END_INFER_TIME,
|
|
380
375
|
)
|
|
381
376
|
# Update an existing application result
|
|
382
377
|
self._update(
|
|
@@ -386,14 +381,12 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
386
381
|
)
|
|
387
382
|
else:
|
|
388
383
|
# Write a new application result
|
|
389
|
-
event[
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
]
|
|
393
|
-
)
|
|
384
|
+
event[mm_constants.EventFieldType.UID] = application_filter_dict[
|
|
385
|
+
mm_constants.EventFieldType.UID
|
|
386
|
+
]
|
|
394
387
|
|
|
395
388
|
self._write(
|
|
396
|
-
table=
|
|
389
|
+
table=mm_constants.FileTargetKind.APP_RESULTS,
|
|
397
390
|
event=event,
|
|
398
391
|
)
|
|
399
392
|
|
|
@@ -405,11 +398,11 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
405
398
|
@staticmethod
|
|
406
399
|
def _generate_application_result_uid(event: dict[str, typing.Any]) -> str:
|
|
407
400
|
return (
|
|
408
|
-
event[
|
|
401
|
+
event[mm_constants.WriterEvent.ENDPOINT_ID]
|
|
409
402
|
+ "_"
|
|
410
|
-
+ event[
|
|
403
|
+
+ event[mm_constants.WriterEvent.APPLICATION_NAME]
|
|
411
404
|
+ "_"
|
|
412
|
-
+ event[
|
|
405
|
+
+ event[mm_constants.ResultData.RESULT_NAME]
|
|
413
406
|
)
|
|
414
407
|
|
|
415
408
|
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
@@ -459,19 +452,17 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
459
452
|
if not monitoring_schedule_record:
|
|
460
453
|
# Add a new record with empty last analyzed value
|
|
461
454
|
self._write(
|
|
462
|
-
table=
|
|
455
|
+
table=mm_constants.FileTargetKind.MONITORING_SCHEDULES,
|
|
463
456
|
event={
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
457
|
+
mm_constants.SchedulingKeys.UID: uuid.uuid4().hex,
|
|
458
|
+
mm_constants.SchedulingKeys.APPLICATION_NAME: application_name,
|
|
459
|
+
mm_constants.SchedulingKeys.ENDPOINT_ID: endpoint_id,
|
|
460
|
+
mm_constants.SchedulingKeys.LAST_ANALYZED: last_analyzed,
|
|
468
461
|
},
|
|
469
462
|
)
|
|
470
463
|
|
|
471
464
|
self._update(
|
|
472
|
-
attributes={
|
|
473
|
-
mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED: last_analyzed
|
|
474
|
-
},
|
|
465
|
+
attributes={mm_constants.SchedulingKeys.LAST_ANALYZED: last_analyzed},
|
|
475
466
|
table=self.MonitoringSchedulesTable,
|
|
476
467
|
**application_filter_dict,
|
|
477
468
|
)
|
|
@@ -567,9 +558,7 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
567
558
|
|
|
568
559
|
# Convert endpoint labels into dictionary
|
|
569
560
|
endpoint_labels = json.loads(
|
|
570
|
-
endpoint_dict.get(
|
|
571
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.LABELS
|
|
572
|
-
)
|
|
561
|
+
endpoint_dict.get(mm_constants.EventFieldType.LABELS)
|
|
573
562
|
)
|
|
574
563
|
|
|
575
564
|
for label in labels:
|
|
@@ -596,26 +585,25 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
596
585
|
)
|
|
597
586
|
application_filter_dict = {}
|
|
598
587
|
if endpoint_id:
|
|
599
|
-
application_filter_dict[
|
|
600
|
-
|
|
601
|
-
|
|
588
|
+
application_filter_dict[mm_constants.SchedulingKeys.ENDPOINT_ID] = (
|
|
589
|
+
endpoint_id
|
|
590
|
+
)
|
|
602
591
|
if application_name:
|
|
603
|
-
application_filter_dict[
|
|
604
|
-
|
|
605
|
-
|
|
592
|
+
application_filter_dict[mm_constants.SchedulingKeys.APPLICATION_NAME] = (
|
|
593
|
+
application_name
|
|
594
|
+
)
|
|
606
595
|
return application_filter_dict
|
|
607
596
|
|
|
608
|
-
def delete_model_endpoints_resources(self
|
|
597
|
+
def delete_model_endpoints_resources(self):
|
|
609
598
|
"""
|
|
610
599
|
Delete all model endpoints resources in both SQL and the time series DB.
|
|
611
600
|
|
|
612
|
-
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
613
601
|
"""
|
|
614
602
|
|
|
603
|
+
endpoints = self.list_model_endpoints()
|
|
604
|
+
|
|
615
605
|
for endpoint_dict in endpoints:
|
|
616
|
-
endpoint_id = endpoint_dict[
|
|
617
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
618
|
-
]
|
|
606
|
+
endpoint_id = endpoint_dict[mm_constants.EventFieldType.UID]
|
|
619
607
|
|
|
620
608
|
# Delete last analyzed records
|
|
621
609
|
self._delete_last_analyzed(endpoint_id=endpoint_id)
|
|
@@ -625,38 +613,3 @@ class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
|
625
613
|
|
|
626
614
|
# Delete model endpoint record
|
|
627
615
|
self.delete_model_endpoint(endpoint_id=endpoint_id)
|
|
628
|
-
|
|
629
|
-
def get_endpoint_real_time_metrics(
|
|
630
|
-
self,
|
|
631
|
-
endpoint_id: str,
|
|
632
|
-
metrics: list[str],
|
|
633
|
-
start: str = "now-1h",
|
|
634
|
-
end: str = "now",
|
|
635
|
-
access_key: str = None,
|
|
636
|
-
) -> dict[str, list[tuple[str, float]]]:
|
|
637
|
-
"""
|
|
638
|
-
Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
|
|
639
|
-
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
640
|
-
|
|
641
|
-
:param endpoint_id: The unique id of the model endpoint.
|
|
642
|
-
:param metrics: A list of real-time metrics to return for the model endpoint.
|
|
643
|
-
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
644
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
645
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
646
|
-
earliest time.
|
|
647
|
-
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
648
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
649
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
650
|
-
earliest time.
|
|
651
|
-
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
652
|
-
provided, the access key will be retrieved from the environment variables.
|
|
653
|
-
|
|
654
|
-
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
655
|
-
includes timestamps and the values.
|
|
656
|
-
"""
|
|
657
|
-
# # TODO : Implement this method once Perometheus is supported
|
|
658
|
-
logger.warning(
|
|
659
|
-
"Real time metrics service using Prometheus will be implemented in 1.4.0"
|
|
660
|
-
)
|
|
661
|
-
|
|
662
|
-
return {}
|