mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -2
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +21 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +113 -2
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +11 -0
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +224 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +374 -102
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +231 -22
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +864 -228
- mlrun/db/nopdb.py +268 -16
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1125 -414
- mlrun/render.py +28 -22
- mlrun/run.py +207 -180
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +40 -14
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +646 -177
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc5.dist-info/METADATA +0 -269
- mlrun-1.7.0rc5.dist-info/RECORD +0 -323
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/serving/v2_serving.py
CHANGED
|
@@ -15,12 +15,13 @@
|
|
|
15
15
|
import threading
|
|
16
16
|
import time
|
|
17
17
|
import traceback
|
|
18
|
-
from typing import Union
|
|
18
|
+
from typing import Optional, Union
|
|
19
19
|
|
|
20
|
-
import mlrun.
|
|
20
|
+
import mlrun.artifacts
|
|
21
|
+
import mlrun.common.model_monitoring.helpers
|
|
21
22
|
import mlrun.common.schemas.model_monitoring
|
|
22
|
-
|
|
23
|
-
from mlrun.
|
|
23
|
+
import mlrun.model_monitoring
|
|
24
|
+
from mlrun.errors import err_to_str
|
|
24
25
|
from mlrun.utils import logger, now_date
|
|
25
26
|
|
|
26
27
|
from ..common.helpers import parse_versioned_object_uri
|
|
@@ -38,6 +39,7 @@ class V2ModelServer(StepToDict):
|
|
|
38
39
|
protocol=None,
|
|
39
40
|
input_path: str = None,
|
|
40
41
|
result_path: str = None,
|
|
42
|
+
shard_by_endpoint: Optional[bool] = None,
|
|
41
43
|
**kwargs,
|
|
42
44
|
):
|
|
43
45
|
"""base model serving class (v2), using similar API to KFServing v2 and Triton
|
|
@@ -62,11 +64,11 @@ class V2ModelServer(StepToDict):
|
|
|
62
64
|
class MyClass(V2ModelServer):
|
|
63
65
|
def load(self):
|
|
64
66
|
# load and initialize the model and/or other elements
|
|
65
|
-
model_file, extra_data = self.get_model(suffix=
|
|
67
|
+
model_file, extra_data = self.get_model(suffix=".pkl")
|
|
66
68
|
self.model = load(open(model_file, "rb"))
|
|
67
69
|
|
|
68
70
|
def predict(self, request):
|
|
69
|
-
events = np.array(request[
|
|
71
|
+
events = np.array(request["inputs"])
|
|
70
72
|
dmatrix = xgb.DMatrix(events)
|
|
71
73
|
result: xgb.DMatrix = self.model.predict(dmatrix)
|
|
72
74
|
return {"outputs": result.tolist()}
|
|
@@ -90,6 +92,8 @@ class V2ModelServer(StepToDict):
|
|
|
90
92
|
this require that the event body will behave like a dict, example:
|
|
91
93
|
event: {"x": 5} , result_path="resp" means the returned response will be written
|
|
92
94
|
to event["y"] resulting in {"x": 5, "resp": <result>}
|
|
95
|
+
:param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
|
|
96
|
+
monitoring stream. Defaults to True.
|
|
93
97
|
:param kwargs: extra arguments (can be accessed using self.get_param(key))
|
|
94
98
|
"""
|
|
95
99
|
self.name = name
|
|
@@ -101,7 +105,7 @@ class V2ModelServer(StepToDict):
|
|
|
101
105
|
self.error = ""
|
|
102
106
|
self.protocol = protocol or "v2"
|
|
103
107
|
self.model_path = model_path
|
|
104
|
-
self.model_spec: mlrun.artifacts.ModelArtifact = None
|
|
108
|
+
self.model_spec: Optional[mlrun.artifacts.ModelArtifact] = None
|
|
105
109
|
self._input_path = input_path
|
|
106
110
|
self._result_path = result_path
|
|
107
111
|
self._kwargs = kwargs # for to_dict()
|
|
@@ -118,7 +122,9 @@ class V2ModelServer(StepToDict):
|
|
|
118
122
|
if model:
|
|
119
123
|
self.model = model
|
|
120
124
|
self.ready = True
|
|
125
|
+
self._versioned_model_name = None
|
|
121
126
|
self.model_endpoint_uid = None
|
|
127
|
+
self.shard_by_endpoint = shard_by_endpoint
|
|
122
128
|
|
|
123
129
|
def _load_and_update_state(self):
|
|
124
130
|
try:
|
|
@@ -147,7 +153,7 @@ class V2ModelServer(StepToDict):
|
|
|
147
153
|
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
148
154
|
return
|
|
149
155
|
|
|
150
|
-
if not self.context.is_mock or self.context.
|
|
156
|
+
if not self.context.is_mock or self.context.monitoring_mock:
|
|
151
157
|
self.model_endpoint_uid = _init_endpoint_record(
|
|
152
158
|
graph_server=server, model=self
|
|
153
159
|
)
|
|
@@ -175,9 +181,9 @@ class V2ModelServer(StepToDict):
|
|
|
175
181
|
::
|
|
176
182
|
|
|
177
183
|
def load(self):
|
|
178
|
-
model_file, extra_data = self.get_model(suffix=
|
|
184
|
+
model_file, extra_data = self.get_model(suffix=".pkl")
|
|
179
185
|
self.model = load(open(model_file, "rb"))
|
|
180
|
-
categories = extra_data[
|
|
186
|
+
categories = extra_data["categories"].as_df()
|
|
181
187
|
|
|
182
188
|
Parameters
|
|
183
189
|
----------
|
|
@@ -224,6 +230,23 @@ class V2ModelServer(StepToDict):
|
|
|
224
230
|
request = self.preprocess(event_body, op)
|
|
225
231
|
return self.validate(request, op)
|
|
226
232
|
|
|
233
|
+
@property
|
|
234
|
+
def versioned_model_name(self):
|
|
235
|
+
if self._versioned_model_name:
|
|
236
|
+
return self._versioned_model_name
|
|
237
|
+
|
|
238
|
+
# Generating version model value based on the model name and model version
|
|
239
|
+
if self.model_path and self.model_path.startswith("store://"):
|
|
240
|
+
# Enrich the model server with the model artifact metadata
|
|
241
|
+
self.get_model()
|
|
242
|
+
if not self.version:
|
|
243
|
+
# Enrich the model version with the model artifact tag
|
|
244
|
+
self.version = self.model_spec.tag
|
|
245
|
+
self.labels = self.model_spec.labels
|
|
246
|
+
version = self.version or "latest"
|
|
247
|
+
self._versioned_model_name = f"{self.name}:{version}"
|
|
248
|
+
return self._versioned_model_name
|
|
249
|
+
|
|
227
250
|
def do_event(self, event, *args, **kwargs):
|
|
228
251
|
"""main model event handler method"""
|
|
229
252
|
start = now_date()
|
|
@@ -231,6 +254,11 @@ class V2ModelServer(StepToDict):
|
|
|
231
254
|
event_body = _extract_input_data(self._input_path, event.body)
|
|
232
255
|
event_id = event.id
|
|
233
256
|
op = event.path.strip("/")
|
|
257
|
+
|
|
258
|
+
partition_key = (
|
|
259
|
+
self.model_endpoint_uid if self.shard_by_endpoint is not False else None
|
|
260
|
+
)
|
|
261
|
+
|
|
234
262
|
if event_body and isinstance(event_body, dict):
|
|
235
263
|
op = op or event_body.get("operation")
|
|
236
264
|
event_id = event_body.get("id", event_id)
|
|
@@ -250,13 +278,20 @@ class V2ModelServer(StepToDict):
|
|
|
250
278
|
except Exception as exc:
|
|
251
279
|
request["id"] = event_id
|
|
252
280
|
if self._model_logger:
|
|
253
|
-
self._model_logger.push(
|
|
281
|
+
self._model_logger.push(
|
|
282
|
+
start,
|
|
283
|
+
request,
|
|
284
|
+
op=op,
|
|
285
|
+
error=exc,
|
|
286
|
+
partition_key=partition_key,
|
|
287
|
+
)
|
|
254
288
|
raise exc
|
|
255
289
|
|
|
256
290
|
response = {
|
|
257
291
|
"id": event_id,
|
|
258
292
|
"model_name": self.name,
|
|
259
293
|
"outputs": outputs,
|
|
294
|
+
"timestamp": start.isoformat(sep=" ", timespec="microseconds"),
|
|
260
295
|
}
|
|
261
296
|
if self.version:
|
|
262
297
|
response["model_version"] = self.version
|
|
@@ -286,7 +321,7 @@ class V2ModelServer(StepToDict):
|
|
|
286
321
|
setattr(event, "terminated", True)
|
|
287
322
|
event_body = {
|
|
288
323
|
"name": self.name,
|
|
289
|
-
"version": self.version,
|
|
324
|
+
"version": self.version or "",
|
|
290
325
|
"inputs": [],
|
|
291
326
|
"outputs": [],
|
|
292
327
|
}
|
|
@@ -306,7 +341,13 @@ class V2ModelServer(StepToDict):
|
|
|
306
341
|
except Exception as exc:
|
|
307
342
|
request["id"] = event_id
|
|
308
343
|
if self._model_logger:
|
|
309
|
-
self._model_logger.push(
|
|
344
|
+
self._model_logger.push(
|
|
345
|
+
start,
|
|
346
|
+
request,
|
|
347
|
+
op=op,
|
|
348
|
+
error=exc,
|
|
349
|
+
partition_key=partition_key,
|
|
350
|
+
)
|
|
310
351
|
raise exc
|
|
311
352
|
|
|
312
353
|
response = {
|
|
@@ -330,11 +371,20 @@ class V2ModelServer(StepToDict):
|
|
|
330
371
|
if self._model_logger:
|
|
331
372
|
inputs, outputs = self.logged_results(request, response, op)
|
|
332
373
|
if inputs is None and outputs is None:
|
|
333
|
-
self._model_logger.push(
|
|
374
|
+
self._model_logger.push(
|
|
375
|
+
start, request, response, op, partition_key=partition_key
|
|
376
|
+
)
|
|
334
377
|
else:
|
|
335
378
|
track_request = {"id": event_id, "inputs": inputs or []}
|
|
336
379
|
track_response = {"outputs": outputs or []}
|
|
337
|
-
|
|
380
|
+
# TODO : check dict/list
|
|
381
|
+
self._model_logger.push(
|
|
382
|
+
start,
|
|
383
|
+
track_request,
|
|
384
|
+
track_response,
|
|
385
|
+
op,
|
|
386
|
+
partition_key=partition_key,
|
|
387
|
+
)
|
|
338
388
|
event.body = _update_result_body(self._result_path, original_body, response)
|
|
339
389
|
return event
|
|
340
390
|
|
|
@@ -375,8 +425,10 @@ class V2ModelServer(StepToDict):
|
|
|
375
425
|
"""postprocess, before returning response"""
|
|
376
426
|
return request
|
|
377
427
|
|
|
378
|
-
def predict(self, request: dict) ->
|
|
379
|
-
"""model prediction operation
|
|
428
|
+
def predict(self, request: dict) -> list:
|
|
429
|
+
"""model prediction operation
|
|
430
|
+
:return: list with the model prediction results (can be multi-port) or list of lists for multiple predictions
|
|
431
|
+
"""
|
|
380
432
|
raise NotImplementedError()
|
|
381
433
|
|
|
382
434
|
def explain(self, request: dict) -> dict:
|
|
@@ -449,7 +501,7 @@ class _ModelLogPusher:
|
|
|
449
501
|
base_data["labels"] = self.model.labels
|
|
450
502
|
return base_data
|
|
451
503
|
|
|
452
|
-
def push(self, start, request, resp=None, op=None, error=None):
|
|
504
|
+
def push(self, start, request, resp=None, op=None, error=None, partition_key=None):
|
|
453
505
|
start_str = start.isoformat(sep=" ", timespec="microseconds")
|
|
454
506
|
if error:
|
|
455
507
|
data = self.base_data()
|
|
@@ -460,7 +512,7 @@ class _ModelLogPusher:
|
|
|
460
512
|
if self.verbose:
|
|
461
513
|
message = f"{message}\n{traceback.format_exc()}"
|
|
462
514
|
data["error"] = message
|
|
463
|
-
self.output_stream.push([data])
|
|
515
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
464
516
|
return
|
|
465
517
|
|
|
466
518
|
self._sample_iter = (self._sample_iter + 1) % self.stream_sample
|
|
@@ -486,7 +538,7 @@ class _ModelLogPusher:
|
|
|
486
538
|
"metrics",
|
|
487
539
|
]
|
|
488
540
|
data["values"] = self._batch
|
|
489
|
-
self.output_stream.push([data])
|
|
541
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
490
542
|
else:
|
|
491
543
|
data = self.base_data()
|
|
492
544
|
data["request"] = request
|
|
@@ -496,7 +548,7 @@ class _ModelLogPusher:
|
|
|
496
548
|
data["microsec"] = microsec
|
|
497
549
|
if getattr(self.model, "metrics", None):
|
|
498
550
|
data["metrics"] = self.model.metrics
|
|
499
|
-
self.output_stream.push([data])
|
|
551
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
500
552
|
|
|
501
553
|
|
|
502
554
|
def _init_endpoint_record(
|
|
@@ -523,62 +575,83 @@ def _init_endpoint_record(
|
|
|
523
575
|
graph_server.function_uri
|
|
524
576
|
)
|
|
525
577
|
except Exception as e:
|
|
526
|
-
logger.error("Failed to parse function URI", exc=e)
|
|
578
|
+
logger.error("Failed to parse function URI", exc=err_to_str(e))
|
|
527
579
|
return None
|
|
528
580
|
|
|
529
|
-
# Generating version model value based on the model name and model version
|
|
530
|
-
if model.version:
|
|
531
|
-
versioned_model_name = f"{model.name}:{model.version}"
|
|
532
|
-
else:
|
|
533
|
-
versioned_model_name = f"{model.name}:latest"
|
|
534
|
-
|
|
535
581
|
# Generating model endpoint ID based on function uri and model version
|
|
536
582
|
uid = mlrun.common.model_monitoring.create_model_endpoint_uid(
|
|
537
|
-
function_uri=graph_server.function_uri,
|
|
583
|
+
function_uri=graph_server.function_uri,
|
|
584
|
+
versioned_model=model.versioned_model_name,
|
|
538
585
|
).uid
|
|
539
586
|
|
|
540
|
-
# If model endpoint object was found in DB, skip the creation process.
|
|
541
587
|
try:
|
|
542
|
-
mlrun.get_run_db().get_model_endpoint(
|
|
543
|
-
|
|
588
|
+
model_ep = mlrun.get_run_db().get_model_endpoint(
|
|
589
|
+
project=project, endpoint_id=uid
|
|
590
|
+
)
|
|
544
591
|
except mlrun.errors.MLRunNotFoundError:
|
|
592
|
+
model_ep = None
|
|
593
|
+
except mlrun.errors.MLRunBadRequestError as err:
|
|
594
|
+
logger.info(
|
|
595
|
+
"Cannot get the model endpoints store", err=mlrun.errors.err_to_str(err)
|
|
596
|
+
)
|
|
597
|
+
return
|
|
598
|
+
|
|
599
|
+
if model.context.server.track_models and not model_ep:
|
|
545
600
|
logger.info("Creating a new model endpoint record", endpoint_id=uid)
|
|
601
|
+
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
602
|
+
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
603
|
+
project=project, labels=model.labels, uid=uid
|
|
604
|
+
),
|
|
605
|
+
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
606
|
+
function_uri=graph_server.function_uri,
|
|
607
|
+
model=model.versioned_model_name,
|
|
608
|
+
model_class=model.__class__.__name__,
|
|
609
|
+
model_uri=model.model_path,
|
|
610
|
+
stream_path=model.context.stream.stream_uri,
|
|
611
|
+
active=True,
|
|
612
|
+
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
613
|
+
),
|
|
614
|
+
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
615
|
+
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
|
|
616
|
+
),
|
|
617
|
+
)
|
|
546
618
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
function_uri=graph_server.function_uri,
|
|
554
|
-
model=versioned_model_name,
|
|
555
|
-
model_class=model.__class__.__name__,
|
|
556
|
-
model_uri=model.model_path,
|
|
557
|
-
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
558
|
-
project=project, kind="stream"
|
|
559
|
-
),
|
|
560
|
-
active=True,
|
|
561
|
-
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
562
|
-
if model.context.server.track_models
|
|
563
|
-
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
|
|
564
|
-
),
|
|
565
|
-
status=mlrun.common.schemas.ModelEndpointStatus(
|
|
566
|
-
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
|
|
567
|
-
),
|
|
568
|
-
)
|
|
619
|
+
db = mlrun.get_run_db()
|
|
620
|
+
db.create_model_endpoint(
|
|
621
|
+
project=project,
|
|
622
|
+
endpoint_id=uid,
|
|
623
|
+
model_endpoint=model_endpoint.dict(),
|
|
624
|
+
)
|
|
569
625
|
|
|
626
|
+
elif model_ep:
|
|
627
|
+
attributes = {}
|
|
628
|
+
old_model_uri = model_ep.spec.model_uri
|
|
629
|
+
mlrun.model_monitoring.helpers.enrich_model_endpoint_with_model_uri(
|
|
630
|
+
model_endpoint=model_ep,
|
|
631
|
+
model_obj=model.model_spec,
|
|
632
|
+
)
|
|
633
|
+
if model_ep.spec.model_uri != old_model_uri:
|
|
634
|
+
attributes["model_uri"] = model_ep.spec.model_uri
|
|
635
|
+
if (
|
|
636
|
+
model_ep.spec.monitoring_mode
|
|
637
|
+
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
638
|
+
) != model.context.server.track_models:
|
|
639
|
+
attributes["monitoring_mode"] = (
|
|
640
|
+
mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
|
|
641
|
+
if model.context.server.track_models
|
|
642
|
+
else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
|
|
643
|
+
)
|
|
644
|
+
if attributes:
|
|
570
645
|
db = mlrun.get_run_db()
|
|
571
|
-
|
|
572
|
-
db.create_model_endpoint(
|
|
646
|
+
db.patch_model_endpoint(
|
|
573
647
|
project=project,
|
|
574
648
|
endpoint_id=uid,
|
|
575
|
-
|
|
649
|
+
attributes=attributes,
|
|
650
|
+
)
|
|
651
|
+
logger.info(
|
|
652
|
+
"Updating model endpoint attributes",
|
|
653
|
+
attributes=attributes,
|
|
654
|
+
endpoint_id=uid,
|
|
576
655
|
)
|
|
577
|
-
|
|
578
|
-
except Exception as e:
|
|
579
|
-
logger.error("Failed to create endpoint record", exc=e)
|
|
580
|
-
|
|
581
|
-
except Exception as e:
|
|
582
|
-
logger.error("Failed to retrieve model endpoint object", exc=e)
|
|
583
656
|
|
|
584
657
|
return uid
|
mlrun/track/tracker.py
CHANGED
|
@@ -31,8 +31,9 @@ class Tracker(ABC):
|
|
|
31
31
|
* Offline: Manually importing models and artifacts into an MLRun project using the `import_x` methods.
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
+
@staticmethod
|
|
34
35
|
@abstractmethod
|
|
35
|
-
def is_enabled(
|
|
36
|
+
def is_enabled() -> bool:
|
|
36
37
|
"""
|
|
37
38
|
Checks if tracker is enabled.
|
|
38
39
|
|
|
@@ -442,6 +442,11 @@ class MLFlowTracker(Tracker):
|
|
|
442
442
|
# Prepare the archive path:
|
|
443
443
|
model_uri = pathlib.Path(model_uri)
|
|
444
444
|
archive_path = pathlib.Path(tmp_path) / f"{model_uri.stem}.zip"
|
|
445
|
+
if not os.path.exists(model_uri):
|
|
446
|
+
local_path = mlflow.artifacts.download_artifacts(
|
|
447
|
+
artifact_uri=str(model_uri)
|
|
448
|
+
)
|
|
449
|
+
model_uri = pathlib.Path(local_path)
|
|
445
450
|
|
|
446
451
|
# TODO add progress bar for the case of large files
|
|
447
452
|
# Zip the artifact:
|
mlrun/utils/async_http.py
CHANGED
|
@@ -24,7 +24,7 @@ from aiohttp_retry import ExponentialRetry, RequestParams, RetryClient, RetryOpt
|
|
|
24
24
|
from aiohttp_retry.client import _RequestContext
|
|
25
25
|
|
|
26
26
|
from mlrun.config import config
|
|
27
|
-
from mlrun.errors import err_to_str
|
|
27
|
+
from mlrun.errors import err_to_str, raise_for_status
|
|
28
28
|
|
|
29
29
|
from .helpers import logger as mlrun_logger
|
|
30
30
|
|
|
@@ -46,12 +46,21 @@ class AsyncClientWithRetry(RetryClient):
|
|
|
46
46
|
*args,
|
|
47
47
|
**kwargs,
|
|
48
48
|
):
|
|
49
|
+
# do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
|
|
50
|
+
blacklisted_methods = (
|
|
51
|
+
blacklisted_methods
|
|
52
|
+
if blacklisted_methods is not None
|
|
53
|
+
else [
|
|
54
|
+
"POST",
|
|
55
|
+
"PUT",
|
|
56
|
+
"PATCH",
|
|
57
|
+
]
|
|
58
|
+
)
|
|
49
59
|
super().__init__(
|
|
50
60
|
*args,
|
|
51
61
|
retry_options=ExponentialRetryOverride(
|
|
52
62
|
retry_on_exception=retry_on_exception,
|
|
53
|
-
|
|
54
|
-
blacklisted_methods=blacklisted_methods or ["POST", "PUT", "PATCH"],
|
|
63
|
+
blacklisted_methods=blacklisted_methods,
|
|
55
64
|
attempts=max_retries,
|
|
56
65
|
statuses=retry_on_status_codes,
|
|
57
66
|
factor=retry_backoff_factor,
|
|
@@ -63,6 +72,12 @@ class AsyncClientWithRetry(RetryClient):
|
|
|
63
72
|
**kwargs,
|
|
64
73
|
)
|
|
65
74
|
|
|
75
|
+
def methods_blacklist_update_required(self, new_blacklist: str):
|
|
76
|
+
self._retry_options: ExponentialRetryOverride
|
|
77
|
+
return set(self._retry_options.blacklisted_methods).difference(
|
|
78
|
+
set(new_blacklist)
|
|
79
|
+
)
|
|
80
|
+
|
|
66
81
|
def _make_requests(
|
|
67
82
|
self,
|
|
68
83
|
params_list: list[RequestParams],
|
|
@@ -173,7 +188,7 @@ class _CustomRequestContext(_RequestContext):
|
|
|
173
188
|
last_attempt = current_attempt == self._retry_options.attempts
|
|
174
189
|
if self._is_status_code_ok(response.status) or last_attempt:
|
|
175
190
|
if self._raise_for_status:
|
|
176
|
-
|
|
191
|
+
raise_for_status(response)
|
|
177
192
|
|
|
178
193
|
self._response = response
|
|
179
194
|
return response
|
|
@@ -222,7 +237,7 @@ class _CustomRequestContext(_RequestContext):
|
|
|
222
237
|
retry_wait = self._retry_options.get_timeout(
|
|
223
238
|
attempt=current_attempt, response=None
|
|
224
239
|
)
|
|
225
|
-
self._logger.
|
|
240
|
+
self._logger.warning(
|
|
226
241
|
"Request failed on retryable exception, retrying",
|
|
227
242
|
retry_wait_secs=retry_wait,
|
|
228
243
|
method=params.method,
|
|
@@ -275,6 +290,11 @@ class _CustomRequestContext(_RequestContext):
|
|
|
275
290
|
if isinstance(exc.os_error, exc_type):
|
|
276
291
|
return
|
|
277
292
|
if exc.__cause__:
|
|
278
|
-
return
|
|
293
|
+
# If the cause exception is retriable, return, otherwise, raise the original exception
|
|
294
|
+
try:
|
|
295
|
+
self.verify_exception_type(exc.__cause__)
|
|
296
|
+
except Exception:
|
|
297
|
+
raise exc
|
|
298
|
+
return
|
|
279
299
|
else:
|
|
280
300
|
raise exc
|
mlrun/utils/db.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
import abc
|
|
15
16
|
import pickle
|
|
16
17
|
from datetime import datetime
|
|
17
18
|
|
|
@@ -28,12 +29,22 @@ class BaseModel:
|
|
|
28
29
|
columns = [column.key for column in mapper.columns if column.key not in exclude]
|
|
29
30
|
|
|
30
31
|
def get_key_value(c):
|
|
32
|
+
# all (never say never) DB classes have "object" defined as "full_object"
|
|
33
|
+
if c == "object":
|
|
34
|
+
c = "full_object"
|
|
31
35
|
if isinstance(getattr(self, c), datetime):
|
|
32
36
|
return c, getattr(self, c).isoformat()
|
|
33
37
|
return c, getattr(self, c)
|
|
34
38
|
|
|
35
39
|
return dict(map(get_key_value, columns))
|
|
36
40
|
|
|
41
|
+
@abc.abstractmethod
|
|
42
|
+
def get_identifier_string(self):
|
|
43
|
+
"""
|
|
44
|
+
This method must be implemented by any subclass.
|
|
45
|
+
"""
|
|
46
|
+
pass
|
|
47
|
+
|
|
37
48
|
|
|
38
49
|
class HasStruct(BaseModel):
|
|
39
50
|
@property
|
|
@@ -51,3 +62,10 @@ class HasStruct(BaseModel):
|
|
|
51
62
|
exclude = exclude or []
|
|
52
63
|
exclude.append("body")
|
|
53
64
|
return super().to_dict(exclude, strip=strip)
|
|
65
|
+
|
|
66
|
+
@abc.abstractmethod
|
|
67
|
+
def get_identifier_string(self):
|
|
68
|
+
"""
|
|
69
|
+
This method must be implemented by any subclass.
|
|
70
|
+
"""
|
|
71
|
+
pass
|