mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +2 -1
- mlrun/artifacts/plots.py +9 -5
- mlrun/common/constants.py +6 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +35 -18
- mlrun/common/schemas/project.py +1 -0
- mlrun/common/types.py +7 -1
- mlrun/config.py +19 -6
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +22 -16
- mlrun/datastore/datastore.py +4 -0
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/sources.py +7 -7
- mlrun/db/base.py +14 -6
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +61 -56
- mlrun/db/nopdb.py +3 -0
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/model.py +1 -0
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +104 -301
- mlrun/model_monitoring/application.py +21 -21
- mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
- mlrun/model_monitoring/controller.py +26 -33
- mlrun/model_monitoring/db/__init__.py +16 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +45 -6
- mlrun/model_monitoring/stream_processing.py +43 -9
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +4 -36
- mlrun/projects/pipelines.py +13 -1
- mlrun/projects/project.py +279 -117
- mlrun/run.py +72 -74
- mlrun/runtimes/__init__.py +35 -0
- mlrun/runtimes/base.py +7 -1
- mlrun/runtimes/nuclio/api_gateway.py +188 -61
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +283 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
- mlrun/runtimes/nuclio/function.py +53 -1
- mlrun/runtimes/nuclio/serving.py +28 -32
- mlrun/runtimes/pod.py +27 -1
- mlrun/serving/server.py +4 -6
- mlrun/serving/states.py +41 -33
- mlrun/utils/helpers.py +34 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/mysql.py +0 -34
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import datetime
|
|
16
|
+
import json
|
|
17
|
+
import typing
|
|
18
|
+
import uuid
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import sqlalchemy
|
|
22
|
+
|
|
23
|
+
import mlrun.common.model_monitoring.helpers
|
|
24
|
+
import mlrun.common.schemas.model_monitoring
|
|
25
|
+
import mlrun.model_monitoring.db
|
|
26
|
+
import mlrun.model_monitoring.db.stores.sqldb.models
|
|
27
|
+
import mlrun.model_monitoring.helpers
|
|
28
|
+
from mlrun.common.db.sql_session import create_session, get_engine
|
|
29
|
+
from mlrun.utils import logger
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SQLStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
33
|
+
"""
|
|
34
|
+
Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
|
|
35
|
+
SQL toolkit that handles the communication with the database. When using SQL for storing the model monitoring
|
|
36
|
+
data, the user needs to provide a valid connection string for the database.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
_engine = None
|
|
40
|
+
_tables = {}
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
project: str,
|
|
45
|
+
secret_provider: typing.Callable = None,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
Initialize SQL store target object.
|
|
49
|
+
|
|
50
|
+
:param project: The name of the project.
|
|
51
|
+
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
super().__init__(project=project)
|
|
55
|
+
|
|
56
|
+
self._sql_connection_string = (
|
|
57
|
+
mlrun.model_monitoring.helpers.get_connection_string(
|
|
58
|
+
secret_provider=secret_provider
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self._engine = get_engine(dsn=self._sql_connection_string)
|
|
63
|
+
|
|
64
|
+
def _init_tables(self):
|
|
65
|
+
self._init_model_endpoints_table()
|
|
66
|
+
self._init_application_results_table()
|
|
67
|
+
self._init_monitoring_schedules_table()
|
|
68
|
+
|
|
69
|
+
def _init_model_endpoints_table(self):
|
|
70
|
+
self.ModelEndpointsTable = (
|
|
71
|
+
mlrun.model_monitoring.db.stores.sqldb.models._get_model_endpoints_table(
|
|
72
|
+
connection_string=self._sql_connection_string
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
self._tables[
|
|
76
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS
|
|
77
|
+
] = self.ModelEndpointsTable
|
|
78
|
+
|
|
79
|
+
def _init_application_results_table(self):
|
|
80
|
+
self.ApplicationResultsTable = (
|
|
81
|
+
mlrun.model_monitoring.db.stores.sqldb.models._get_application_result_table(
|
|
82
|
+
connection_string=self._sql_connection_string
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
self._tables[
|
|
86
|
+
mlrun.common.schemas.model_monitoring.FileTargetKind.APP_RESULTS
|
|
87
|
+
] = self.ApplicationResultsTable
|
|
88
|
+
|
|
89
|
+
def _init_monitoring_schedules_table(self):
|
|
90
|
+
self.MonitoringSchedulesTable = mlrun.model_monitoring.db.stores.sqldb.models._get_monitoring_schedules_table(
|
|
91
|
+
connection_string=self._sql_connection_string
|
|
92
|
+
)
|
|
93
|
+
self._tables[
|
|
94
|
+
mlrun.common.schemas.model_monitoring.FileTargetKind.MONITORING_SCHEDULES
|
|
95
|
+
] = self.MonitoringSchedulesTable
|
|
96
|
+
|
|
97
|
+
def _write(self, table: str, event: dict[str, typing.Any]):
|
|
98
|
+
"""
|
|
99
|
+
Create a new record in the SQL table.
|
|
100
|
+
|
|
101
|
+
:param table: Target table name.
|
|
102
|
+
:param event: Event dictionary that will be written into the DB.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
with self._engine.connect() as connection:
|
|
106
|
+
# Convert the result into a pandas Dataframe and write it into the database
|
|
107
|
+
event_df = pd.DataFrame([event])
|
|
108
|
+
|
|
109
|
+
event_df.to_sql(table, con=connection, index=False, if_exists="append")
|
|
110
|
+
|
|
111
|
+
def _update(
|
|
112
|
+
self,
|
|
113
|
+
attributes: dict[str, typing.Any],
|
|
114
|
+
table: sqlalchemy.orm.decl_api.DeclarativeMeta,
|
|
115
|
+
**filtered_values,
|
|
116
|
+
):
|
|
117
|
+
"""
|
|
118
|
+
Update a record in the SQL table.
|
|
119
|
+
|
|
120
|
+
:param attributes: Dictionary of attributes that will be used for update the record. Note that the keys
|
|
121
|
+
of the attributes dictionary should exist in the SQL table.
|
|
122
|
+
:param table: SQLAlchemy declarative table.
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
filter_query_ = []
|
|
126
|
+
for _filter in filtered_values:
|
|
127
|
+
filter_query_.append(f"{_filter} = '{filtered_values[_filter]}'")
|
|
128
|
+
|
|
129
|
+
with create_session(dsn=self._sql_connection_string) as session:
|
|
130
|
+
# Generate and commit the update session query
|
|
131
|
+
session.query(table).filter(sqlalchemy.sql.text(*filter_query_)).update(
|
|
132
|
+
attributes, synchronize_session=False
|
|
133
|
+
)
|
|
134
|
+
session.commit()
|
|
135
|
+
|
|
136
|
+
def _get(self, table: sqlalchemy.orm.decl_api.DeclarativeMeta, **filtered_values):
|
|
137
|
+
"""
|
|
138
|
+
Get a record from the SQL table.
|
|
139
|
+
|
|
140
|
+
param table: SQLAlchemy declarative table.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
filter_query_ = []
|
|
144
|
+
for _filter in filtered_values:
|
|
145
|
+
filter_query_.append(f"{_filter} = '{filtered_values[_filter]}'")
|
|
146
|
+
with create_session(dsn=self._sql_connection_string) as session:
|
|
147
|
+
try:
|
|
148
|
+
# Generate the get query
|
|
149
|
+
return (
|
|
150
|
+
session.query(table)
|
|
151
|
+
.filter(sqlalchemy.sql.text(*filter_query_))
|
|
152
|
+
.one_or_none()
|
|
153
|
+
)
|
|
154
|
+
except sqlalchemy.exc.ProgrammingError:
|
|
155
|
+
# Probably table doesn't exist, try to create tables
|
|
156
|
+
self._create_tables_if_not_exist()
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
def _delete(
|
|
160
|
+
self, table: sqlalchemy.orm.decl_api.DeclarativeMeta, **filtered_values
|
|
161
|
+
):
|
|
162
|
+
"""
|
|
163
|
+
Delete records from the SQL table.
|
|
164
|
+
|
|
165
|
+
param table: SQLAlchemy declarative table.
|
|
166
|
+
"""
|
|
167
|
+
filter_query_ = []
|
|
168
|
+
for _filter in filtered_values:
|
|
169
|
+
filter_query_.append(f"{_filter} = '{filtered_values[_filter]}'")
|
|
170
|
+
with create_session(dsn=self._sql_connection_string) as session:
|
|
171
|
+
# Generate and commit the delete query
|
|
172
|
+
session.query(table).filter(sqlalchemy.sql.text(*filter_query_)).delete(
|
|
173
|
+
synchronize_session=False
|
|
174
|
+
)
|
|
175
|
+
session.commit()
|
|
176
|
+
|
|
177
|
+
def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
|
|
178
|
+
"""
|
|
179
|
+
Create a new endpoint record in the SQL table. This method also creates the model endpoints table within the
|
|
180
|
+
SQL database if not exist.
|
|
181
|
+
|
|
182
|
+
:param endpoint: model endpoint dictionary that will be written into the DB.
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
# Adjust timestamps fields
|
|
186
|
+
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.FIRST_REQUEST] = (
|
|
187
|
+
endpoint
|
|
188
|
+
)[
|
|
189
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.LAST_REQUEST
|
|
190
|
+
] = mlrun.utils.datetime_now()
|
|
191
|
+
|
|
192
|
+
self._write(
|
|
193
|
+
table=mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS,
|
|
194
|
+
event=endpoint,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def update_model_endpoint(
|
|
198
|
+
self, endpoint_id: str, attributes: dict[str, typing.Any]
|
|
199
|
+
):
|
|
200
|
+
"""
|
|
201
|
+
Update a model endpoint record with a given attributes.
|
|
202
|
+
|
|
203
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
204
|
+
:param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
|
|
205
|
+
of the attributes dictionary should exist in the SQL table.
|
|
206
|
+
|
|
207
|
+
"""
|
|
208
|
+
self._init_model_endpoints_table()
|
|
209
|
+
|
|
210
|
+
attributes.pop(
|
|
211
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID, None
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
filter_endpoint = {
|
|
215
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID: endpoint_id
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
self._update(
|
|
219
|
+
attributes=attributes, table=self.ModelEndpointsTable, **filter_endpoint
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def delete_model_endpoint(self, endpoint_id: str):
|
|
223
|
+
"""
|
|
224
|
+
Deletes the SQL record of a given model endpoint id.
|
|
225
|
+
|
|
226
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
227
|
+
"""
|
|
228
|
+
self._init_model_endpoints_table()
|
|
229
|
+
|
|
230
|
+
filter_endpoint = {
|
|
231
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID: endpoint_id
|
|
232
|
+
}
|
|
233
|
+
# Delete the model endpoint record using sqlalchemy ORM
|
|
234
|
+
self._delete(table=self.ModelEndpointsTable, **filter_endpoint)
|
|
235
|
+
|
|
236
|
+
def get_model_endpoint(
|
|
237
|
+
self,
|
|
238
|
+
endpoint_id: str,
|
|
239
|
+
) -> dict[str, typing.Any]:
|
|
240
|
+
"""
|
|
241
|
+
Get a single model endpoint record.
|
|
242
|
+
|
|
243
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
244
|
+
|
|
245
|
+
:return: A model endpoint record as a dictionary.
|
|
246
|
+
|
|
247
|
+
:raise MLRunNotFoundError: If the model endpoints table was not found or the model endpoint id was not found.
|
|
248
|
+
"""
|
|
249
|
+
self._init_model_endpoints_table()
|
|
250
|
+
|
|
251
|
+
# Get the model endpoint record using sqlalchemy ORM
|
|
252
|
+
filter_endpoint = {
|
|
253
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID: endpoint_id
|
|
254
|
+
}
|
|
255
|
+
endpoint_record = self._get(table=self.ModelEndpointsTable, **filter_endpoint)
|
|
256
|
+
|
|
257
|
+
if not endpoint_record:
|
|
258
|
+
raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
|
|
259
|
+
|
|
260
|
+
# Convert the database values and the table columns into a python dictionary
|
|
261
|
+
return endpoint_record.to_dict()
|
|
262
|
+
|
|
263
|
+
def list_model_endpoints(
|
|
264
|
+
self,
|
|
265
|
+
model: str = None,
|
|
266
|
+
function: str = None,
|
|
267
|
+
labels: list[str] = None,
|
|
268
|
+
top_level: bool = None,
|
|
269
|
+
uids: list = None,
|
|
270
|
+
) -> list[dict[str, typing.Any]]:
|
|
271
|
+
"""
|
|
272
|
+
Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
|
|
273
|
+
By default, when no filters are applied, all available model endpoints for the given project will
|
|
274
|
+
be listed.
|
|
275
|
+
|
|
276
|
+
:param model: The name of the model to filter by.
|
|
277
|
+
:param function: The name of the function to filter by.
|
|
278
|
+
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value
|
|
279
|
+
of a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
280
|
+
key (i.e. "key").
|
|
281
|
+
:param top_level: If True will return only routers and endpoint that are NOT children of any router.
|
|
282
|
+
:param uids: List of model endpoint unique ids to include in the result.
|
|
283
|
+
|
|
284
|
+
:return: A list of model endpoint dictionaries.
|
|
285
|
+
"""
|
|
286
|
+
self._init_model_endpoints_table()
|
|
287
|
+
# Generate an empty model endpoints that will be filled afterwards with model endpoint dictionaries
|
|
288
|
+
endpoint_list = []
|
|
289
|
+
|
|
290
|
+
model_endpoints_table = (
|
|
291
|
+
self.ModelEndpointsTable.__table__ # pyright: ignore[reportGeneralTypeIssues]
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Get the model endpoints records using sqlalchemy ORM
|
|
295
|
+
with create_session(dsn=self._sql_connection_string) as session:
|
|
296
|
+
# Generate the list query
|
|
297
|
+
query = session.query(self.ModelEndpointsTable).filter_by(
|
|
298
|
+
project=self.project
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Apply filters
|
|
302
|
+
if model:
|
|
303
|
+
query = self._filter_values(
|
|
304
|
+
query=query,
|
|
305
|
+
model_endpoints_table=model_endpoints_table,
|
|
306
|
+
key_filter=mlrun.common.schemas.model_monitoring.EventFieldType.MODEL,
|
|
307
|
+
filtered_values=[model],
|
|
308
|
+
)
|
|
309
|
+
if function:
|
|
310
|
+
query = self._filter_values(
|
|
311
|
+
query=query,
|
|
312
|
+
model_endpoints_table=model_endpoints_table,
|
|
313
|
+
key_filter=mlrun.common.schemas.model_monitoring.EventFieldType.FUNCTION,
|
|
314
|
+
filtered_values=[function],
|
|
315
|
+
)
|
|
316
|
+
if uids:
|
|
317
|
+
query = self._filter_values(
|
|
318
|
+
query=query,
|
|
319
|
+
model_endpoints_table=model_endpoints_table,
|
|
320
|
+
key_filter=mlrun.common.schemas.model_monitoring.EventFieldType.UID,
|
|
321
|
+
filtered_values=uids,
|
|
322
|
+
combined=False,
|
|
323
|
+
)
|
|
324
|
+
if top_level:
|
|
325
|
+
node_ep = str(
|
|
326
|
+
mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP.value
|
|
327
|
+
)
|
|
328
|
+
router_ep = str(
|
|
329
|
+
mlrun.common.schemas.model_monitoring.EndpointType.ROUTER.value
|
|
330
|
+
)
|
|
331
|
+
endpoint_types = [node_ep, router_ep]
|
|
332
|
+
query = self._filter_values(
|
|
333
|
+
query=query,
|
|
334
|
+
model_endpoints_table=model_endpoints_table,
|
|
335
|
+
key_filter=mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE,
|
|
336
|
+
filtered_values=endpoint_types,
|
|
337
|
+
combined=False,
|
|
338
|
+
)
|
|
339
|
+
# Convert the results from the DB into a ModelEndpoint object and append it to the model endpoints list
|
|
340
|
+
for endpoint_record in query.all():
|
|
341
|
+
endpoint_dict = endpoint_record.to_dict()
|
|
342
|
+
|
|
343
|
+
# Filter labels
|
|
344
|
+
if labels and not self._validate_labels(
|
|
345
|
+
endpoint_dict=endpoint_dict, labels=labels
|
|
346
|
+
):
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
endpoint_list.append(endpoint_dict)
|
|
350
|
+
|
|
351
|
+
return endpoint_list
|
|
352
|
+
|
|
353
|
+
def write_application_result(self, event: dict[str, typing.Any]):
|
|
354
|
+
"""
|
|
355
|
+
Write a new application result event in the target table.
|
|
356
|
+
|
|
357
|
+
:param event: An event dictionary that represents the application result, should be corresponded to the
|
|
358
|
+
schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
359
|
+
object.
|
|
360
|
+
"""
|
|
361
|
+
self._init_application_results_table()
|
|
362
|
+
|
|
363
|
+
application_filter_dict = {
|
|
364
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID: self._generate_application_result_uid(
|
|
365
|
+
event
|
|
366
|
+
)
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
application_record = self._get(
|
|
370
|
+
table=self.ApplicationResultsTable, **application_filter_dict
|
|
371
|
+
)
|
|
372
|
+
if application_record:
|
|
373
|
+
self._convert_to_datetime(
|
|
374
|
+
event=event,
|
|
375
|
+
key=mlrun.common.schemas.model_monitoring.WriterEvent.START_INFER_TIME,
|
|
376
|
+
)
|
|
377
|
+
self._convert_to_datetime(
|
|
378
|
+
event=event,
|
|
379
|
+
key=mlrun.common.schemas.model_monitoring.WriterEvent.END_INFER_TIME,
|
|
380
|
+
)
|
|
381
|
+
# Update an existing application result
|
|
382
|
+
self._update(
|
|
383
|
+
attributes=event,
|
|
384
|
+
table=self.ApplicationResultsTable,
|
|
385
|
+
**application_filter_dict,
|
|
386
|
+
)
|
|
387
|
+
else:
|
|
388
|
+
# Write a new application result
|
|
389
|
+
event[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
|
|
390
|
+
application_filter_dict[
|
|
391
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
392
|
+
]
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
self._write(
|
|
396
|
+
table=mlrun.common.schemas.model_monitoring.FileTargetKind.APP_RESULTS,
|
|
397
|
+
event=event,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
@staticmethod
|
|
401
|
+
def _convert_to_datetime(event: dict[str, typing.Any], key: str):
|
|
402
|
+
if isinstance(event[key], str):
|
|
403
|
+
event[key] = datetime.datetime.fromisoformat(event[key])
|
|
404
|
+
|
|
405
|
+
@staticmethod
|
|
406
|
+
def _generate_application_result_uid(event: dict[str, typing.Any]) -> str:
|
|
407
|
+
return (
|
|
408
|
+
event[mlrun.common.schemas.model_monitoring.WriterEvent.ENDPOINT_ID]
|
|
409
|
+
+ "_"
|
|
410
|
+
+ event[mlrun.common.schemas.model_monitoring.WriterEvent.APPLICATION_NAME]
|
|
411
|
+
+ "_"
|
|
412
|
+
+ event[mlrun.common.schemas.model_monitoring.WriterEvent.RESULT_NAME]
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
416
|
+
"""
|
|
417
|
+
Get the last analyzed time for the provided model endpoint and application.
|
|
418
|
+
|
|
419
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
420
|
+
:param application_name: Registered application name.
|
|
421
|
+
|
|
422
|
+
:return: Timestamp as a Unix time.
|
|
423
|
+
:raise: MLRunNotFoundError if last analyzed value is not found.
|
|
424
|
+
|
|
425
|
+
"""
|
|
426
|
+
self._init_monitoring_schedules_table()
|
|
427
|
+
application_filter_dict = self.filter_endpoint_and_application_name(
|
|
428
|
+
endpoint_id=endpoint_id, application_name=application_name
|
|
429
|
+
)
|
|
430
|
+
monitoring_schedule_record = self._get(
|
|
431
|
+
table=self.MonitoringSchedulesTable, **application_filter_dict
|
|
432
|
+
)
|
|
433
|
+
if not monitoring_schedule_record:
|
|
434
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
435
|
+
f"No last analyzed value has been found for {application_name} "
|
|
436
|
+
f"that processes model endpoint {endpoint_id}"
|
|
437
|
+
)
|
|
438
|
+
return monitoring_schedule_record.last_analyzed
|
|
439
|
+
|
|
440
|
+
def update_last_analyzed(
|
|
441
|
+
self, endpoint_id: str, application_name: str, last_analyzed: int
|
|
442
|
+
):
|
|
443
|
+
"""
|
|
444
|
+
Update the last analyzed time for the provided model endpoint and application.
|
|
445
|
+
|
|
446
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
447
|
+
:param application_name: Registered application name.
|
|
448
|
+
:param last_analyzed: Timestamp as a Unix time that represents the last analyzed time of a certain
|
|
449
|
+
application and model endpoint.
|
|
450
|
+
"""
|
|
451
|
+
self._init_monitoring_schedules_table()
|
|
452
|
+
|
|
453
|
+
application_filter_dict = self.filter_endpoint_and_application_name(
|
|
454
|
+
endpoint_id=endpoint_id, application_name=application_name
|
|
455
|
+
)
|
|
456
|
+
monitoring_schedule_record = self._get(
|
|
457
|
+
table=self.MonitoringSchedulesTable, **application_filter_dict
|
|
458
|
+
)
|
|
459
|
+
if not monitoring_schedule_record:
|
|
460
|
+
# Add a new record with empty last analyzed value
|
|
461
|
+
self._write(
|
|
462
|
+
table=mlrun.common.schemas.model_monitoring.FileTargetKind.MONITORING_SCHEDULES,
|
|
463
|
+
event={
|
|
464
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.UID: uuid.uuid4().hex,
|
|
465
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.APPLICATION_NAME: application_name,
|
|
466
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.ENDPOINT_ID: endpoint_id,
|
|
467
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED: last_analyzed,
|
|
468
|
+
},
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
self._update(
|
|
472
|
+
attributes={
|
|
473
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.LAST_ANALYZED: last_analyzed
|
|
474
|
+
},
|
|
475
|
+
table=self.MonitoringSchedulesTable,
|
|
476
|
+
**application_filter_dict,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
def _delete_last_analyzed(self, endpoint_id: str = "", application_name: str = ""):
|
|
480
|
+
self._init_monitoring_schedules_table()
|
|
481
|
+
|
|
482
|
+
application_filter_dict = self.filter_endpoint_and_application_name(
|
|
483
|
+
endpoint_id=endpoint_id, application_name=application_name
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
# Delete the model endpoint record using sqlalchemy ORM
|
|
487
|
+
self._delete(table=self.MonitoringSchedulesTable, **application_filter_dict)
|
|
488
|
+
|
|
489
|
+
def _delete_application_result(
|
|
490
|
+
self, endpoint_id: str = "", application_name: str = ""
|
|
491
|
+
):
|
|
492
|
+
self._init_application_results_table()
|
|
493
|
+
|
|
494
|
+
application_filter_dict = self.filter_endpoint_and_application_name(
|
|
495
|
+
endpoint_id=endpoint_id, application_name=application_name
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
# Delete the model endpoint record using sqlalchemy ORM
|
|
499
|
+
self._delete(table=self.ApplicationResultsTable, **application_filter_dict)
|
|
500
|
+
|
|
501
|
+
def _create_tables_if_not_exist(self):
|
|
502
|
+
self._init_tables()
|
|
503
|
+
|
|
504
|
+
for table in self._tables:
|
|
505
|
+
# Create table if not exist. The `metadata` contains the `ModelEndpointsTable`
|
|
506
|
+
if not self._engine.has_table(table):
|
|
507
|
+
self._tables[table].metadata.create_all( # pyright: ignore[reportGeneralTypeIssues]
|
|
508
|
+
bind=self._engine
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
@staticmethod
|
|
512
|
+
def _filter_values(
|
|
513
|
+
query: sqlalchemy.orm.query.Query,
|
|
514
|
+
model_endpoints_table: sqlalchemy.Table,
|
|
515
|
+
key_filter: str,
|
|
516
|
+
filtered_values: list,
|
|
517
|
+
combined=True,
|
|
518
|
+
) -> sqlalchemy.orm.query.Query:
|
|
519
|
+
"""Filtering the SQL query object according to the provided filters.
|
|
520
|
+
|
|
521
|
+
:param query: SQLAlchemy ORM query object. Includes the SELECT statements generated by the ORM
|
|
522
|
+
for getting the model endpoint data from the SQL table.
|
|
523
|
+
:param model_endpoints_table: SQLAlchemy table object that represents the model endpoints table.
|
|
524
|
+
:param key_filter: Key column to filter by.
|
|
525
|
+
:param filtered_values: List of values to filter the query the result.
|
|
526
|
+
:param combined: If true, then apply AND operator on the filtered values list. Otherwise, apply OR
|
|
527
|
+
operator.
|
|
528
|
+
|
|
529
|
+
return: SQLAlchemy ORM query object that represents the updated query with the provided
|
|
530
|
+
filters.
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
if combined and len(filtered_values) > 1:
|
|
534
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
535
|
+
"Can't apply combined policy with multiple values"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
if not combined:
|
|
539
|
+
return query.filter(
|
|
540
|
+
model_endpoints_table.c[key_filter].in_(filtered_values)
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# Generating a tuple with the relevant filters
|
|
544
|
+
filter_query = []
|
|
545
|
+
for _filter in filtered_values:
|
|
546
|
+
filter_query.append(model_endpoints_table.c[key_filter] == _filter)
|
|
547
|
+
|
|
548
|
+
# Apply AND operator on the SQL query object with the filters tuple
|
|
549
|
+
return query.filter(sqlalchemy.and_(*filter_query))
|
|
550
|
+
|
|
551
|
+
@staticmethod
|
|
552
|
+
def _validate_labels(
|
|
553
|
+
endpoint_dict: dict,
|
|
554
|
+
labels: list,
|
|
555
|
+
) -> bool:
|
|
556
|
+
"""Validate that the model endpoint dictionary has the provided labels. There are 2 possible cases:
|
|
557
|
+
1 - Labels were provided as a list of key-values pairs (e.g. ['label_1=value_1', 'label_2=value_2']): Validate
|
|
558
|
+
that each pair exist in the endpoint dictionary.
|
|
559
|
+
2 - Labels were provided as a list of key labels (e.g. ['label_1', 'label_2']): Validate that each key exist in
|
|
560
|
+
the endpoint labels dictionary.
|
|
561
|
+
|
|
562
|
+
:param endpoint_dict: Dictionary of the model endpoint records.
|
|
563
|
+
:param labels: List of dictionary of required labels.
|
|
564
|
+
|
|
565
|
+
:return: True if the labels exist in the endpoint labels dictionary, otherwise False.
|
|
566
|
+
"""
|
|
567
|
+
|
|
568
|
+
# Convert endpoint labels into dictionary
|
|
569
|
+
endpoint_labels = json.loads(
|
|
570
|
+
endpoint_dict.get(
|
|
571
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.LABELS
|
|
572
|
+
)
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
for label in labels:
|
|
576
|
+
# Case 1 - label is a key=value pair
|
|
577
|
+
if "=" in label:
|
|
578
|
+
lbl, value = list(map(lambda x: x.strip(), label.split("=")))
|
|
579
|
+
if lbl not in endpoint_labels or str(endpoint_labels[lbl]) != value:
|
|
580
|
+
return False
|
|
581
|
+
# Case 2 - label is just a key
|
|
582
|
+
else:
|
|
583
|
+
if label not in endpoint_labels:
|
|
584
|
+
return False
|
|
585
|
+
|
|
586
|
+
return True
|
|
587
|
+
|
|
588
|
+
@staticmethod
|
|
589
|
+
def filter_endpoint_and_application_name(
|
|
590
|
+
endpoint_id: str, application_name: str
|
|
591
|
+
) -> dict[str, str]:
|
|
592
|
+
"""Generate a dictionary filter for endpoint id and application name"""
|
|
593
|
+
if not endpoint_id and not application_name:
|
|
594
|
+
raise mlrun.errors.MLRunBadRequestError(
|
|
595
|
+
"Please provide a valid endpoint_id and/or application_name"
|
|
596
|
+
)
|
|
597
|
+
application_filter_dict = {}
|
|
598
|
+
if endpoint_id:
|
|
599
|
+
application_filter_dict[
|
|
600
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.ENDPOINT_ID
|
|
601
|
+
] = endpoint_id
|
|
602
|
+
if application_name:
|
|
603
|
+
application_filter_dict[
|
|
604
|
+
mlrun.common.schemas.model_monitoring.SchedulingKeys.APPLICATION_NAME
|
|
605
|
+
] = application_name
|
|
606
|
+
return application_filter_dict
|
|
607
|
+
|
|
608
|
+
def delete_model_endpoints_resources(self, endpoints: list[dict[str, typing.Any]]):
|
|
609
|
+
"""
|
|
610
|
+
Delete all model endpoints resources in both SQL and the time series DB.
|
|
611
|
+
|
|
612
|
+
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
613
|
+
"""
|
|
614
|
+
|
|
615
|
+
for endpoint_dict in endpoints:
|
|
616
|
+
endpoint_id = endpoint_dict[
|
|
617
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
618
|
+
]
|
|
619
|
+
|
|
620
|
+
# Delete last analyzed records
|
|
621
|
+
self._delete_last_analyzed(endpoint_id=endpoint_id)
|
|
622
|
+
|
|
623
|
+
# Delete application results records
|
|
624
|
+
self._delete_application_result(endpoint_id=endpoint_id)
|
|
625
|
+
|
|
626
|
+
# Delete model endpoint record
|
|
627
|
+
self.delete_model_endpoint(endpoint_id=endpoint_id)
|
|
628
|
+
|
|
629
|
+
def get_endpoint_real_time_metrics(
|
|
630
|
+
self,
|
|
631
|
+
endpoint_id: str,
|
|
632
|
+
metrics: list[str],
|
|
633
|
+
start: str = "now-1h",
|
|
634
|
+
end: str = "now",
|
|
635
|
+
access_key: str = None,
|
|
636
|
+
) -> dict[str, list[tuple[str, float]]]:
|
|
637
|
+
"""
|
|
638
|
+
Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
|
|
639
|
+
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
640
|
+
|
|
641
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
642
|
+
:param metrics: A list of real-time metrics to return for the model endpoint.
|
|
643
|
+
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
644
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
645
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
646
|
+
earliest time.
|
|
647
|
+
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
648
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
649
|
+
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
650
|
+
earliest time.
|
|
651
|
+
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
652
|
+
provided, the access key will be retrieved from the environment variables.
|
|
653
|
+
|
|
654
|
+
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
655
|
+
includes timestamps and the values.
|
|
656
|
+
"""
|
|
657
|
+
# # TODO : Implement this method once Perometheus is supported
|
|
658
|
+
logger.warning(
|
|
659
|
+
"Real time metrics service using Prometheus will be implemented in 1.4.0"
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
return {}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|