mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show
  1. mlrun/api/api/deps.py +14 -1
  2. mlrun/api/api/endpoints/frontend_spec.py +0 -2
  3. mlrun/api/api/endpoints/functions.py +15 -27
  4. mlrun/api/api/endpoints/grafana_proxy.py +435 -74
  5. mlrun/api/api/endpoints/healthz.py +5 -18
  6. mlrun/api/api/endpoints/model_endpoints.py +33 -37
  7. mlrun/api/api/utils.py +6 -13
  8. mlrun/api/crud/__init__.py +14 -16
  9. mlrun/api/crud/logs.py +5 -7
  10. mlrun/api/crud/model_monitoring/__init__.py +2 -2
  11. mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
  12. mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
  13. mlrun/api/crud/pipelines.py +2 -3
  14. mlrun/api/db/sqldb/models/models_mysql.py +52 -19
  15. mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
  16. mlrun/api/db/sqldb/session.py +19 -26
  17. mlrun/api/schemas/__init__.py +2 -0
  18. mlrun/api/schemas/constants.py +0 -13
  19. mlrun/api/schemas/frontend_spec.py +0 -1
  20. mlrun/api/schemas/model_endpoints.py +38 -195
  21. mlrun/api/schemas/schedule.py +2 -2
  22. mlrun/api/utils/clients/log_collector.py +5 -0
  23. mlrun/builder.py +9 -41
  24. mlrun/config.py +1 -76
  25. mlrun/data_types/__init__.py +1 -6
  26. mlrun/data_types/data_types.py +1 -3
  27. mlrun/datastore/__init__.py +2 -9
  28. mlrun/datastore/sources.py +20 -25
  29. mlrun/datastore/store_resources.py +1 -1
  30. mlrun/datastore/targets.py +34 -67
  31. mlrun/datastore/utils.py +4 -26
  32. mlrun/db/base.py +2 -4
  33. mlrun/db/filedb.py +5 -13
  34. mlrun/db/httpdb.py +32 -64
  35. mlrun/db/sqldb.py +2 -4
  36. mlrun/errors.py +0 -5
  37. mlrun/execution.py +0 -2
  38. mlrun/feature_store/api.py +8 -24
  39. mlrun/feature_store/feature_set.py +6 -28
  40. mlrun/feature_store/feature_vector.py +0 -2
  41. mlrun/feature_store/ingestion.py +11 -8
  42. mlrun/feature_store/retrieval/base.py +43 -271
  43. mlrun/feature_store/retrieval/dask_merger.py +153 -55
  44. mlrun/feature_store/retrieval/job.py +3 -12
  45. mlrun/feature_store/retrieval/local_merger.py +130 -48
  46. mlrun/feature_store/retrieval/spark_merger.py +125 -126
  47. mlrun/features.py +2 -7
  48. mlrun/model_monitoring/constants.py +6 -48
  49. mlrun/model_monitoring/helpers.py +35 -118
  50. mlrun/model_monitoring/model_monitoring_batch.py +260 -293
  51. mlrun/model_monitoring/stream_processing_fs.py +253 -220
  52. mlrun/platforms/iguazio.py +0 -33
  53. mlrun/projects/project.py +72 -34
  54. mlrun/runtimes/base.py +0 -5
  55. mlrun/runtimes/daskjob.py +0 -2
  56. mlrun/runtimes/function.py +3 -29
  57. mlrun/runtimes/kubejob.py +15 -39
  58. mlrun/runtimes/local.py +45 -7
  59. mlrun/runtimes/mpijob/abstract.py +0 -2
  60. mlrun/runtimes/mpijob/v1.py +0 -2
  61. mlrun/runtimes/pod.py +0 -2
  62. mlrun/runtimes/remotesparkjob.py +0 -2
  63. mlrun/runtimes/serving.py +0 -6
  64. mlrun/runtimes/sparkjob/abstract.py +2 -39
  65. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  66. mlrun/serving/__init__.py +1 -2
  67. mlrun/serving/routers.py +35 -35
  68. mlrun/serving/server.py +12 -22
  69. mlrun/serving/states.py +30 -162
  70. mlrun/serving/v2_serving.py +10 -13
  71. mlrun/utils/clones.py +1 -1
  72. mlrun/utils/model_monitoring.py +96 -122
  73. mlrun/utils/version/version.json +2 -2
  74. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
  75. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
  76. mlrun/api/crud/model_monitoring/grafana.py +0 -427
  77. mlrun/datastore/spark_udf.py +0 -40
  78. mlrun/model_monitoring/__init__.py +0 -44
  79. mlrun/model_monitoring/common.py +0 -112
  80. mlrun/model_monitoring/model_endpoint.py +0 -141
  81. mlrun/model_monitoring/stores/__init__.py +0 -106
  82. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
  83. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  84. mlrun/model_monitoring/stores/models/__init__.py +0 -23
  85. mlrun/model_monitoring/stores/models/base.py +0 -18
  86. mlrun/model_monitoring/stores/models/mysql.py +0 -100
  87. mlrun/model_monitoring/stores/models/sqlite.py +0 -98
  88. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
  89. mlrun/utils/db.py +0 -52
  90. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
  91. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
  92. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
  93. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
@@ -1,375 +0,0 @@
1
- # Copyright 2018 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- #
15
-
16
- import json
17
- import typing
18
- from datetime import datetime, timezone
19
-
20
- import pandas as pd
21
- import sqlalchemy as db
22
-
23
- import mlrun
24
- import mlrun.model_monitoring.constants as model_monitoring_constants
25
- import mlrun.model_monitoring.model_endpoint
26
- import mlrun.utils.model_monitoring
27
- import mlrun.utils.v3io_clients
28
- from mlrun.api.db.sqldb.session import create_session, get_engine
29
- from mlrun.utils import logger
30
-
31
- from .model_endpoint_store import ModelEndpointStore
32
- from .models import get_ModelEndpointsTable
33
- from .models.base import Base
34
-
35
-
36
- class SQLModelEndpointStore(ModelEndpointStore):
37
-
38
- """
39
- Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
40
- SQL toolkit that handles the communication with the database. When using SQL for storing the model endpoints
41
- record, the user needs to provide a valid connection string for the database.
42
- """
43
-
44
- _engine = None
45
-
46
- def __init__(
47
- self,
48
- project: str,
49
- sql_connection_string: str = None,
50
- ):
51
- """
52
- Initialize SQL store target object.
53
-
54
- :param project: The name of the project.
55
- :param sql_connection_string: Valid connection string or a path to SQL database with model endpoints table.
56
- """
57
-
58
- super().__init__(project=project)
59
-
60
- self.sql_connection_string = (
61
- sql_connection_string
62
- or mlrun.utils.model_monitoring.get_connection_string(project=self.project)
63
- )
64
-
65
- self.table_name = model_monitoring_constants.EventFieldType.MODEL_ENDPOINTS
66
-
67
- self._engine = get_engine(dsn=self.sql_connection_string)
68
- self.ModelEndpointsTable = get_ModelEndpointsTable(
69
- connection_string=self.sql_connection_string
70
- )
71
- # Create table if not exist. The `metadata` contains the `ModelEndpointsTable`
72
- if not self._engine.has_table(self.table_name):
73
- Base.metadata.create_all(bind=self._engine)
74
- self.model_endpoints_table = self.ModelEndpointsTable.__table__
75
-
76
- def write_model_endpoint(self, endpoint: typing.Dict[str, typing.Any]):
77
- """
78
- Create a new endpoint record in the SQL table. This method also creates the model endpoints table within the
79
- SQL database if not exist.
80
-
81
- :param endpoint: model endpoint dictionary that will be written into the DB.
82
- """
83
-
84
- with self._engine.connect() as connection:
85
-
86
- # Adjust timestamps fields
87
- endpoint[
88
- model_monitoring_constants.EventFieldType.FIRST_REQUEST
89
- ] = datetime.now(timezone.utc)
90
- endpoint[
91
- model_monitoring_constants.EventFieldType.LAST_REQUEST
92
- ] = datetime.now(timezone.utc)
93
-
94
- # Convert the result into a pandas Dataframe and write it into the database
95
- endpoint_df = pd.DataFrame([endpoint])
96
-
97
- endpoint_df.to_sql(
98
- self.table_name, con=connection, index=False, if_exists="append"
99
- )
100
-
101
- def update_model_endpoint(
102
- self, endpoint_id: str, attributes: typing.Dict[str, typing.Any]
103
- ):
104
- """
105
- Update a model endpoint record with a given attributes.
106
-
107
- :param endpoint_id: The unique id of the model endpoint.
108
- :param attributes: Dictionary of attributes that will be used for update the model endpoint. Note that the keys
109
- of the attributes dictionary should exist in the SQL table.
110
-
111
- """
112
-
113
- # Update the model endpoint record using sqlalchemy ORM
114
- with create_session(dsn=self.sql_connection_string) as session:
115
-
116
- # Remove endpoint id (foreign key) from the update query
117
- attributes.pop(model_monitoring_constants.EventFieldType.ENDPOINT_ID, None)
118
-
119
- # Generate and commit the update session query
120
- session.query(self.ModelEndpointsTable).filter(
121
- self.ModelEndpointsTable.uid == endpoint_id
122
- ).update(attributes)
123
- session.commit()
124
-
125
- def delete_model_endpoint(self, endpoint_id: str):
126
- """
127
- Deletes the SQL record of a given model endpoint id.
128
-
129
- :param endpoint_id: The unique id of the model endpoint.
130
- """
131
-
132
- # Delete the model endpoint record using sqlalchemy ORM
133
- with create_session(dsn=self.sql_connection_string) as session:
134
-
135
- # Generate and commit the delete query
136
- session.query(self.ModelEndpointsTable).filter_by(uid=endpoint_id).delete()
137
- session.commit()
138
-
139
- def get_model_endpoint(
140
- self,
141
- endpoint_id: str,
142
- ) -> typing.Dict[str, typing.Any]:
143
- """
144
- Get a single model endpoint record.
145
-
146
- :param endpoint_id: The unique id of the model endpoint.
147
-
148
- :return: A model endpoint record as a dictionary.
149
-
150
- :raise MLRunNotFoundError: If the model endpoints table was not found or the model endpoint id was not found.
151
- """
152
-
153
- # Get the model endpoint record using sqlalchemy ORM
154
- with create_session(dsn=self.sql_connection_string) as session:
155
-
156
- # Generate the get query
157
- endpoint_record = (
158
- session.query(self.ModelEndpointsTable)
159
- .filter_by(uid=endpoint_id)
160
- .one_or_none()
161
- )
162
-
163
- if not endpoint_record:
164
- raise mlrun.errors.MLRunNotFoundError(f"Endpoint {endpoint_id} not found")
165
-
166
- # Convert the database values and the table columns into a python dictionary
167
- return endpoint_record.to_dict()
168
-
169
- def list_model_endpoints(
170
- self,
171
- model: str = None,
172
- function: str = None,
173
- labels: typing.List[str] = None,
174
- top_level: bool = None,
175
- uids: typing.List = None,
176
- ) -> typing.List[typing.Dict[str, typing.Any]]:
177
- """
178
- Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
179
- By default, when no filters are applied, all available model endpoints for the given project will
180
- be listed.
181
-
182
- :param model: The name of the model to filter by.
183
- :param function: The name of the function to filter by.
184
- :param labels: A list of labels to filter by. Label filters work by either filtering a specific value
185
- of a label (i.e. list("key=value")) or by looking for the existence of a given
186
- key (i.e. "key").
187
- :param top_level: If True will return only routers and endpoint that are NOT children of any router.
188
- :param uids: List of model endpoint unique ids to include in the result.
189
-
190
- :return: A list of model endpoint dictionaries.
191
- """
192
-
193
- # Generate an empty model endpoints that will be filled afterwards with model endpoint dictionaries
194
- endpoint_list = []
195
-
196
- # Get the model endpoints records using sqlalchemy ORM
197
- with create_session(dsn=self.sql_connection_string) as session:
198
-
199
- # Generate the list query
200
- query = session.query(self.ModelEndpointsTable).filter_by(
201
- project=self.project
202
- )
203
-
204
- # Apply filters
205
- if model:
206
- query = self._filter_values(
207
- query=query,
208
- model_endpoints_table=self.model_endpoints_table,
209
- key_filter=model_monitoring_constants.EventFieldType.MODEL,
210
- filtered_values=[model],
211
- )
212
- if function:
213
- query = self._filter_values(
214
- query=query,
215
- model_endpoints_table=self.model_endpoints_table,
216
- key_filter=model_monitoring_constants.EventFieldType.FUNCTION,
217
- filtered_values=[function],
218
- )
219
- if uids:
220
- query = self._filter_values(
221
- query=query,
222
- model_endpoints_table=self.model_endpoints_table,
223
- key_filter=model_monitoring_constants.EventFieldType.UID,
224
- filtered_values=uids,
225
- combined=False,
226
- )
227
- if top_level:
228
- node_ep = str(mlrun.model_monitoring.EndpointType.NODE_EP.value)
229
- router_ep = str(mlrun.model_monitoring.EndpointType.ROUTER.value)
230
- endpoint_types = [node_ep, router_ep]
231
- query = self._filter_values(
232
- query=query,
233
- model_endpoints_table=self.model_endpoints_table,
234
- key_filter=model_monitoring_constants.EventFieldType.ENDPOINT_TYPE,
235
- filtered_values=endpoint_types,
236
- combined=False,
237
- )
238
- # Convert the results from the DB into a ModelEndpoint object and append it to the model endpoints list
239
- for endpoint_record in query.all():
240
- endpoint_dict = endpoint_record.to_dict()
241
-
242
- # Filter labels
243
- if labels and not self._validate_labels(
244
- endpoint_dict=endpoint_dict, labels=labels
245
- ):
246
- continue
247
-
248
- endpoint_list.append(endpoint_dict)
249
-
250
- return endpoint_list
251
-
252
- @staticmethod
253
- def _filter_values(
254
- query: db.orm.query.Query,
255
- model_endpoints_table: db.Table,
256
- key_filter: str,
257
- filtered_values: typing.List,
258
- combined=True,
259
- ) -> db.orm.query.Query:
260
- """Filtering the SQL query object according to the provided filters.
261
-
262
- :param query: SQLAlchemy ORM query object. Includes the SELECT statements generated by the ORM
263
- for getting the model endpoint data from the SQL table.
264
- :param model_endpoints_table: SQLAlchemy table object that represents the model endpoints table.
265
- :param key_filter: Key column to filter by.
266
- :param filtered_values: List of values to filter the query the result.
267
- :param combined: If true, then apply AND operator on the filtered values list. Otherwise, apply OR
268
- operator.
269
-
270
- return: SQLAlchemy ORM query object that represents the updated query with the provided
271
- filters.
272
- """
273
-
274
- if combined and len(filtered_values) > 1:
275
- raise mlrun.errors.MLRunInvalidArgumentError(
276
- "Can't apply combined policy with multiple values"
277
- )
278
-
279
- if not combined:
280
- return query.filter(
281
- model_endpoints_table.c[key_filter].in_(filtered_values)
282
- )
283
-
284
- # Generating a tuple with the relevant filters
285
- filter_query = []
286
- for _filter in filtered_values:
287
- filter_query.append(model_endpoints_table.c[key_filter] == _filter)
288
-
289
- # Apply AND operator on the SQL query object with the filters tuple
290
- return query.filter(db.and_(*filter_query))
291
-
292
- @staticmethod
293
- def _validate_labels(
294
- endpoint_dict: dict,
295
- labels: typing.List,
296
- ) -> bool:
297
- """Validate that the model endpoint dictionary has the provided labels. There are 2 possible cases:
298
- 1 - Labels were provided as a list of key-values pairs (e.g. ['label_1=value_1', 'label_2=value_2']): Validate
299
- that each pair exist in the endpoint dictionary.
300
- 2 - Labels were provided as a list of key labels (e.g. ['label_1', 'label_2']): Validate that each key exist in
301
- the endpoint labels dictionary.
302
-
303
- :param endpoint_dict: Dictionary of the model endpoint records.
304
- :param labels: List of dictionary of required labels.
305
-
306
- :return: True if the labels exist in the endpoint labels dictionary, otherwise False.
307
- """
308
-
309
- # Convert endpoint labels into dictionary
310
- endpoint_labels = json.loads(
311
- endpoint_dict.get(model_monitoring_constants.EventFieldType.LABELS)
312
- )
313
-
314
- for label in labels:
315
- # Case 1 - label is a key=value pair
316
- if "=" in label:
317
- lbl, value = list(map(lambda x: x.strip(), label.split("=")))
318
- if lbl not in endpoint_labels or str(endpoint_labels[lbl]) != value:
319
- return False
320
- # Case 2 - label is just a key
321
- else:
322
- if label not in endpoint_labels:
323
- return False
324
-
325
- return True
326
-
327
- def delete_model_endpoints_resources(
328
- self, endpoints: typing.List[typing.Dict[str, typing.Any]]
329
- ):
330
- """
331
- Delete all model endpoints resources in both SQL and the time series DB.
332
-
333
- :param endpoints: A list of model endpoints flattened dictionaries.
334
- """
335
-
336
- for endpoint_dict in endpoints:
337
- # Delete model endpoint record from SQL table
338
- self.delete_model_endpoint(
339
- endpoint_dict[model_monitoring_constants.EventFieldType.UID],
340
- )
341
-
342
- def get_endpoint_real_time_metrics(
343
- self,
344
- endpoint_id: str,
345
- metrics: typing.List[str],
346
- start: str = "now-1h",
347
- end: str = "now",
348
- access_key: str = None,
349
- ) -> typing.Dict[str, typing.List[typing.Tuple[str, float]]]:
350
- """
351
- Getting metrics from the time series DB. There are pre-defined metrics for model endpoints such as
352
- `predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
353
-
354
- :param endpoint_id: The unique id of the model endpoint.
355
- :param metrics: A list of real-time metrics to return for the model endpoint.
356
- :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
357
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
358
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
359
- earliest time.
360
- :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
361
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
362
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
363
- earliest time.
364
- :param access_key: V3IO access key that will be used for generating Frames client object. If not
365
- provided, the access key will be retrieved from the environment variables.
366
-
367
- :return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
368
- includes timestamps and the values.
369
- """
370
- # # TODO : Implement this method once Perometheus is supported
371
- logger.warning(
372
- "Real time metrics service using Prometheus will be implemented in 1.4.0"
373
- )
374
-
375
- return {}
mlrun/utils/db.py DELETED
@@ -1,52 +0,0 @@
1
- # Copyright 2018 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- #
15
- import pickle
16
- from datetime import datetime
17
-
18
- from sqlalchemy.orm import class_mapper
19
-
20
-
21
- class BaseModel:
22
- def to_dict(self, exclude=None):
23
- """
24
- NOTE - this function (currently) does not handle serializing relationships
25
- """
26
- exclude = exclude or []
27
- mapper = class_mapper(self.__class__)
28
- columns = [column.key for column in mapper.columns if column.key not in exclude]
29
- get_key_value = (
30
- lambda c: (c, getattr(self, c).isoformat())
31
- if isinstance(getattr(self, c), datetime)
32
- else (c, getattr(self, c))
33
- )
34
- return dict(map(get_key_value, columns))
35
-
36
-
37
- class HasStruct(BaseModel):
38
- @property
39
- def struct(self):
40
- return pickle.loads(self.body)
41
-
42
- @struct.setter
43
- def struct(self, value):
44
- self.body = pickle.dumps(value)
45
-
46
- def to_dict(self, exclude=None):
47
- """
48
- NOTE - this function (currently) does not handle serializing relationships
49
- """
50
- exclude = exclude or []
51
- exclude.append("body")
52
- return super().to_dict(exclude)