mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/api/deps.py +14 -1
- mlrun/api/api/endpoints/frontend_spec.py +0 -2
- mlrun/api/api/endpoints/functions.py +15 -27
- mlrun/api/api/endpoints/grafana_proxy.py +435 -74
- mlrun/api/api/endpoints/healthz.py +5 -18
- mlrun/api/api/endpoints/model_endpoints.py +33 -37
- mlrun/api/api/utils.py +6 -13
- mlrun/api/crud/__init__.py +14 -16
- mlrun/api/crud/logs.py +5 -7
- mlrun/api/crud/model_monitoring/__init__.py +2 -2
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
- mlrun/api/crud/pipelines.py +2 -3
- mlrun/api/db/sqldb/models/models_mysql.py +52 -19
- mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
- mlrun/api/db/sqldb/session.py +19 -26
- mlrun/api/schemas/__init__.py +2 -0
- mlrun/api/schemas/constants.py +0 -13
- mlrun/api/schemas/frontend_spec.py +0 -1
- mlrun/api/schemas/model_endpoints.py +38 -195
- mlrun/api/schemas/schedule.py +2 -2
- mlrun/api/utils/clients/log_collector.py +5 -0
- mlrun/builder.py +9 -41
- mlrun/config.py +1 -76
- mlrun/data_types/__init__.py +1 -6
- mlrun/data_types/data_types.py +1 -3
- mlrun/datastore/__init__.py +2 -9
- mlrun/datastore/sources.py +20 -25
- mlrun/datastore/store_resources.py +1 -1
- mlrun/datastore/targets.py +34 -67
- mlrun/datastore/utils.py +4 -26
- mlrun/db/base.py +2 -4
- mlrun/db/filedb.py +5 -13
- mlrun/db/httpdb.py +32 -64
- mlrun/db/sqldb.py +2 -4
- mlrun/errors.py +0 -5
- mlrun/execution.py +0 -2
- mlrun/feature_store/api.py +8 -24
- mlrun/feature_store/feature_set.py +6 -28
- mlrun/feature_store/feature_vector.py +0 -2
- mlrun/feature_store/ingestion.py +11 -8
- mlrun/feature_store/retrieval/base.py +43 -271
- mlrun/feature_store/retrieval/dask_merger.py +153 -55
- mlrun/feature_store/retrieval/job.py +3 -12
- mlrun/feature_store/retrieval/local_merger.py +130 -48
- mlrun/feature_store/retrieval/spark_merger.py +125 -126
- mlrun/features.py +2 -7
- mlrun/model_monitoring/constants.py +6 -48
- mlrun/model_monitoring/helpers.py +35 -118
- mlrun/model_monitoring/model_monitoring_batch.py +260 -293
- mlrun/model_monitoring/stream_processing_fs.py +253 -220
- mlrun/platforms/iguazio.py +0 -33
- mlrun/projects/project.py +72 -34
- mlrun/runtimes/base.py +0 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/function.py +3 -29
- mlrun/runtimes/kubejob.py +15 -39
- mlrun/runtimes/local.py +45 -7
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/pod.py +0 -2
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/serving.py +0 -6
- mlrun/runtimes/sparkjob/abstract.py +2 -39
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +1 -2
- mlrun/serving/routers.py +35 -35
- mlrun/serving/server.py +12 -22
- mlrun/serving/states.py +30 -162
- mlrun/serving/v2_serving.py +10 -13
- mlrun/utils/clones.py +1 -1
- mlrun/utils/model_monitoring.py +96 -122
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
- mlrun/api/crud/model_monitoring/grafana.py +0 -427
- mlrun/datastore/spark_udf.py +0 -40
- mlrun/model_monitoring/__init__.py +0 -44
- mlrun/model_monitoring/common.py +0 -112
- mlrun/model_monitoring/model_endpoint.py +0 -141
- mlrun/model_monitoring/stores/__init__.py +0 -106
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -23
- mlrun/model_monitoring/stores/models/base.py +0 -18
- mlrun/model_monitoring/stores/models/mysql.py +0 -100
- mlrun/model_monitoring/stores/models/sqlite.py +0 -98
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
- mlrun/utils/db.py +0 -52
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
mlrun/serving/routers.py
CHANGED
|
@@ -24,7 +24,6 @@ import numpy
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
|
|
26
26
|
import mlrun
|
|
27
|
-
import mlrun.model_monitoring
|
|
28
27
|
import mlrun.utils.model_monitoring
|
|
29
28
|
from mlrun.utils import logger, now_date, parse_versioned_object_uri
|
|
30
29
|
|
|
@@ -33,8 +32,10 @@ from ..api.schemas import (
|
|
|
33
32
|
ModelEndpointMetadata,
|
|
34
33
|
ModelEndpointSpec,
|
|
35
34
|
ModelEndpointStatus,
|
|
35
|
+
ModelMonitoringMode,
|
|
36
36
|
)
|
|
37
37
|
from ..config import config
|
|
38
|
+
from ..utils.model_monitoring import EndpointType
|
|
38
39
|
from .server import GraphServer
|
|
39
40
|
from .utils import RouterToDict, _extract_input_data, _update_result_body
|
|
40
41
|
from .v2_serving import _ModelLogPusher
|
|
@@ -401,14 +402,12 @@ class ParallelRun(BaseModelRouter):
|
|
|
401
402
|
step._parent = None
|
|
402
403
|
if step._object:
|
|
403
404
|
step._object.context = None
|
|
404
|
-
if hasattr(step._object, "_kwargs"):
|
|
405
|
-
step._object._kwargs["graph_step"] = None
|
|
406
405
|
routes[key] = step
|
|
407
406
|
executor_class = concurrent.futures.ProcessPoolExecutor
|
|
408
407
|
self._pool = executor_class(
|
|
409
408
|
max_workers=len(self.routes),
|
|
410
409
|
initializer=ParallelRun.init_pool,
|
|
411
|
-
initargs=(server, routes),
|
|
410
|
+
initargs=(server, routes, id(self)),
|
|
412
411
|
)
|
|
413
412
|
elif self.executor_type == ParallelRunnerModes.thread:
|
|
414
413
|
executor_class = concurrent.futures.ThreadPoolExecutor
|
|
@@ -423,7 +422,7 @@ class ParallelRun(BaseModelRouter):
|
|
|
423
422
|
if self._pool is not None:
|
|
424
423
|
if self.executor_type == ParallelRunnerModes.process:
|
|
425
424
|
global local_routes
|
|
426
|
-
|
|
425
|
+
local_routes.pop(id(self))
|
|
427
426
|
self._pool.shutdown()
|
|
428
427
|
self._pool = None
|
|
429
428
|
|
|
@@ -447,7 +446,7 @@ class ParallelRun(BaseModelRouter):
|
|
|
447
446
|
for route in self.routes.keys():
|
|
448
447
|
if self.executor_type == ParallelRunnerModes.process:
|
|
449
448
|
future = executor.submit(
|
|
450
|
-
ParallelRun._wrap_step, route, copy.copy(event)
|
|
449
|
+
ParallelRun._wrap_step, route, id(self), copy.copy(event)
|
|
451
450
|
)
|
|
452
451
|
elif self.executor_type == ParallelRunnerModes.thread:
|
|
453
452
|
step = self.routes[route]
|
|
@@ -471,22 +470,25 @@ class ParallelRun(BaseModelRouter):
|
|
|
471
470
|
return results
|
|
472
471
|
|
|
473
472
|
@staticmethod
|
|
474
|
-
def init_pool(server_spec, routes):
|
|
473
|
+
def init_pool(server_spec, routes, object_id):
|
|
475
474
|
server = mlrun.serving.GraphServer.from_dict(server_spec)
|
|
476
475
|
server.init_states(None, None)
|
|
477
476
|
global local_routes
|
|
477
|
+
if object_id in local_routes:
|
|
478
|
+
return
|
|
478
479
|
for route in routes.values():
|
|
479
480
|
route.context = server.context
|
|
480
481
|
if route._object:
|
|
481
482
|
route._object.context = server.context
|
|
482
|
-
local_routes = routes
|
|
483
|
+
local_routes[object_id] = routes
|
|
483
484
|
|
|
484
485
|
@staticmethod
|
|
485
|
-
def _wrap_step(route, event):
|
|
486
|
+
def _wrap_step(route, object_id, event):
|
|
486
487
|
global local_routes
|
|
487
|
-
|
|
488
|
+
routes = local_routes.get(object_id, None).copy()
|
|
489
|
+
if routes is None:
|
|
488
490
|
return None, None
|
|
489
|
-
return route,
|
|
491
|
+
return route, routes[route].run(event)
|
|
490
492
|
|
|
491
493
|
@staticmethod
|
|
492
494
|
def _wrap_method(route, handler, event):
|
|
@@ -1041,7 +1043,7 @@ def _init_endpoint_record(
|
|
|
1041
1043
|
versioned_model_name = f"{voting_ensemble.name}:latest"
|
|
1042
1044
|
|
|
1043
1045
|
# Generating model endpoint ID based on function uri and model version
|
|
1044
|
-
endpoint_uid = mlrun.model_monitoring.
|
|
1046
|
+
endpoint_uid = mlrun.utils.model_monitoring.create_model_endpoint_id(
|
|
1045
1047
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
1046
1048
|
).uid
|
|
1047
1049
|
|
|
@@ -1059,33 +1061,33 @@ def _init_endpoint_record(
|
|
|
1059
1061
|
if hasattr(c, "endpoint_uid"):
|
|
1060
1062
|
children_uids.append(c.endpoint_uid)
|
|
1061
1063
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1064
|
+
model_endpoint = ModelEndpoint(
|
|
1065
|
+
metadata=ModelEndpointMetadata(project=project, uid=endpoint_uid),
|
|
1066
|
+
spec=ModelEndpointSpec(
|
|
1067
|
+
function_uri=graph_server.function_uri,
|
|
1068
|
+
model=versioned_model_name,
|
|
1069
|
+
model_class=voting_ensemble.__class__.__name__,
|
|
1070
|
+
stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1071
|
+
project=project, kind="stream"
|
|
1072
|
+
),
|
|
1073
|
+
active=True,
|
|
1074
|
+
monitoring_mode=ModelMonitoringMode.enabled
|
|
1075
|
+
if voting_ensemble.context.server.track_models
|
|
1076
|
+
else ModelMonitoringMode.disabled,
|
|
1070
1077
|
),
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
children=list(voting_ensemble.routes.keys()),
|
|
1078
|
-
endpoint_type=mlrun.model_monitoring.EndpointType.ROUTER,
|
|
1079
|
-
children_uids=children_uids,
|
|
1080
|
-
),
|
|
1081
|
-
)
|
|
1078
|
+
status=ModelEndpointStatus(
|
|
1079
|
+
children=list(voting_ensemble.routes.keys()),
|
|
1080
|
+
endpoint_type=EndpointType.ROUTER,
|
|
1081
|
+
children_uids=children_uids,
|
|
1082
|
+
),
|
|
1083
|
+
)
|
|
1082
1084
|
|
|
1083
1085
|
db = mlrun.get_run_db()
|
|
1084
1086
|
|
|
1085
1087
|
db.create_model_endpoint(
|
|
1086
1088
|
project=project,
|
|
1087
1089
|
endpoint_id=model_endpoint.metadata.uid,
|
|
1088
|
-
model_endpoint=model_endpoint
|
|
1090
|
+
model_endpoint=model_endpoint,
|
|
1089
1091
|
)
|
|
1090
1092
|
|
|
1091
1093
|
# Update model endpoint children type
|
|
@@ -1093,9 +1095,7 @@ def _init_endpoint_record(
|
|
|
1093
1095
|
current_endpoint = db.get_model_endpoint(
|
|
1094
1096
|
project=project, endpoint_id=model_endpoint
|
|
1095
1097
|
)
|
|
1096
|
-
current_endpoint.status.endpoint_type =
|
|
1097
|
-
mlrun.model_monitoring.EndpointType.LEAF_EP
|
|
1098
|
-
)
|
|
1098
|
+
current_endpoint.status.endpoint_type = EndpointType.LEAF_EP
|
|
1099
1099
|
db.create_model_endpoint(
|
|
1100
1100
|
project=project,
|
|
1101
1101
|
endpoint_id=model_endpoint,
|
mlrun/serving/server.py
CHANGED
|
@@ -18,22 +18,21 @@ import asyncio
|
|
|
18
18
|
import json
|
|
19
19
|
import os
|
|
20
20
|
import socket
|
|
21
|
+
import sys
|
|
21
22
|
import traceback
|
|
22
23
|
import uuid
|
|
23
24
|
from typing import Optional, Union
|
|
24
25
|
|
|
25
26
|
import mlrun
|
|
26
|
-
import mlrun.utils.model_monitoring
|
|
27
27
|
from mlrun.config import config
|
|
28
28
|
from mlrun.errors import err_to_str
|
|
29
|
-
from mlrun.model_monitoring import FileTargetKind
|
|
30
29
|
from mlrun.secrets import SecretsStore
|
|
31
30
|
|
|
32
31
|
from ..datastore import get_stream_pusher
|
|
33
32
|
from ..datastore.store_resources import ResourceCache
|
|
34
33
|
from ..errors import MLRunInvalidArgumentError
|
|
35
34
|
from ..model import ModelObj
|
|
36
|
-
from ..utils import get_caller_globals, parse_versioned_object_uri
|
|
35
|
+
from ..utils import create_logger, get_caller_globals, parse_versioned_object_uri
|
|
37
36
|
from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
|
|
38
37
|
from .utils import (
|
|
39
38
|
event_id_key,
|
|
@@ -44,41 +43,32 @@ from .utils import (
|
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
class _StreamContext:
|
|
47
|
-
|
|
48
|
-
that will be used for pushing the events from the nuclio model serving function"""
|
|
49
|
-
|
|
50
|
-
def __init__(self, enabled: bool, parameters: dict, function_uri: str):
|
|
51
|
-
|
|
52
|
-
"""
|
|
53
|
-
Initialize _StreamContext object.
|
|
54
|
-
:param enabled: A boolean indication for applying the stream context
|
|
55
|
-
:param parameters: Dictionary of optional parameters, such as `log_stream` and `stream_args`. Note that these
|
|
56
|
-
parameters might be relevant to the output source such as `kafka_bootstrap_servers` if
|
|
57
|
-
the output source is from type Kafka.
|
|
58
|
-
:param function_uri: Full value of the function uri, usually it's <project-name>/<function-name>
|
|
59
|
-
"""
|
|
60
|
-
|
|
46
|
+
def __init__(self, enabled, parameters, function_uri):
|
|
61
47
|
self.enabled = False
|
|
62
48
|
self.hostname = socket.gethostname()
|
|
63
49
|
self.function_uri = function_uri
|
|
64
50
|
self.output_stream = None
|
|
65
51
|
self.stream_uri = None
|
|
66
|
-
log_stream = parameters.get(FileTargetKind.LOG_STREAM, "")
|
|
67
52
|
|
|
68
|
-
|
|
53
|
+
log_stream = parameters.get("log_stream", "")
|
|
54
|
+
stream_uri = config.model_endpoint_monitoring.store_prefixes.default
|
|
55
|
+
|
|
56
|
+
if ((enabled and stream_uri) or log_stream) and function_uri:
|
|
69
57
|
self.enabled = True
|
|
58
|
+
|
|
70
59
|
project, _, _, _ = parse_versioned_object_uri(
|
|
71
60
|
function_uri, config.default_project
|
|
72
61
|
)
|
|
73
62
|
|
|
74
|
-
stream_uri =
|
|
63
|
+
stream_uri = stream_uri.format(project=project, kind="stream")
|
|
75
64
|
|
|
76
65
|
if log_stream:
|
|
77
|
-
# Update the stream path to the log stream value
|
|
78
66
|
stream_uri = log_stream.format(project=project)
|
|
79
67
|
|
|
80
68
|
stream_args = parameters.get("stream_args", {})
|
|
81
69
|
|
|
70
|
+
self.stream_uri = stream_uri
|
|
71
|
+
|
|
82
72
|
self.output_stream = get_stream_pusher(stream_uri, **stream_args)
|
|
83
73
|
|
|
84
74
|
|
|
@@ -468,7 +458,7 @@ class GraphContext:
|
|
|
468
458
|
self.Response = nuclio_context.Response
|
|
469
459
|
self.worker_id = nuclio_context.worker_id
|
|
470
460
|
elif not logger:
|
|
471
|
-
self.logger =
|
|
461
|
+
self.logger = create_logger(level, "human", "flow", sys.stdout)
|
|
472
462
|
|
|
473
463
|
self._server = server
|
|
474
464
|
self.current_function = None
|
mlrun/serving/states.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
__all__ = ["TaskStep", "RouterStep", "RootFlowStep"
|
|
15
|
+
__all__ = ["TaskStep", "RouterStep", "RootFlowStep"]
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
18
|
import pathlib
|
|
@@ -49,7 +49,6 @@ class StepKinds:
|
|
|
49
49
|
queue = "queue"
|
|
50
50
|
choice = "choice"
|
|
51
51
|
root = "root"
|
|
52
|
-
error_step = "error_step"
|
|
53
52
|
|
|
54
53
|
|
|
55
54
|
_task_step_fields = [
|
|
@@ -135,82 +134,11 @@ class BaseStep(ModelObj):
|
|
|
135
134
|
self.after.append(name)
|
|
136
135
|
return self
|
|
137
136
|
|
|
138
|
-
def error_handler(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
before=None,
|
|
144
|
-
function=None,
|
|
145
|
-
full_event: bool = None,
|
|
146
|
-
input_path: str = None,
|
|
147
|
-
result_path: str = None,
|
|
148
|
-
**class_args,
|
|
149
|
-
):
|
|
150
|
-
"""set error handler on a step or the entire graph (to be executed on failure/raise)
|
|
151
|
-
|
|
152
|
-
When setting the error_handler on the graph object, the graph completes after the error handler execution.
|
|
153
|
-
|
|
154
|
-
example:
|
|
155
|
-
in the below example, an 'error_catcher' step is set as the error_handler of the 'raise' step:
|
|
156
|
-
in case of error/raise in 'raise' step, the handle_error will be run. after that,
|
|
157
|
-
the 'echo' step will be run.
|
|
158
|
-
graph = function.set_topology('flow', engine='async')
|
|
159
|
-
graph.to(name='raise', handler='raising_step')\
|
|
160
|
-
.error_handler(name='error_catcher', handler='handle_error', full_event=True, before='echo')
|
|
161
|
-
graph.add_step(name="echo", handler='echo', after="raise").respond()
|
|
162
|
-
|
|
163
|
-
:param name: unique name (and path) for the error handler step, default is class name
|
|
164
|
-
:param class_name: class name or step object to build the step from
|
|
165
|
-
the error handler step is derived from task step (ie no router/queue functionally)
|
|
166
|
-
:param handler: class/function handler to invoke on run/event
|
|
167
|
-
:param before: string or list of next step(s) names that will run after this step.
|
|
168
|
-
the `before` param must not specify upstream steps as it will cause a loop.
|
|
169
|
-
if `before` is not specified, the graph will complete after the error handler execution.
|
|
170
|
-
:param function: function this step should run in
|
|
171
|
-
:param full_event: this step accepts the full event (not just the body)
|
|
172
|
-
:param input_path: selects the key/path in the event to use as input to the step
|
|
173
|
-
this requires that the event body will behave like a dict, for example:
|
|
174
|
-
event: {"data": {"a": 5, "b": 7}}, input_path="data.b" means the step will
|
|
175
|
-
receive 7 as input
|
|
176
|
-
:param result_path: selects the key/path in the event to write the results to
|
|
177
|
-
this requires that the event body will behave like a dict, for example:
|
|
178
|
-
event: {"x": 5} , result_path="y" means the output of the step will be written
|
|
179
|
-
to event["y"] resulting in {"x": 5, "y": <result>}
|
|
180
|
-
:param class_args: class init arguments
|
|
181
|
-
|
|
182
|
-
"""
|
|
183
|
-
if not (class_name or handler):
|
|
184
|
-
raise MLRunInvalidArgumentError("class_name or handler must be provided")
|
|
185
|
-
if isinstance(self, RootFlowStep) and before:
|
|
186
|
-
raise MLRunInvalidArgumentError(
|
|
187
|
-
"`before` arg can't be specified for graph error handler"
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
name = get_name(name, class_name)
|
|
191
|
-
step = ErrorStep(
|
|
192
|
-
class_name,
|
|
193
|
-
class_args,
|
|
194
|
-
handler,
|
|
195
|
-
name=name,
|
|
196
|
-
function=function,
|
|
197
|
-
full_event=full_event,
|
|
198
|
-
input_path=input_path,
|
|
199
|
-
result_path=result_path,
|
|
200
|
-
)
|
|
201
|
-
self.on_error = name
|
|
202
|
-
before = [before] if isinstance(before, str) else before
|
|
203
|
-
step.before = before or []
|
|
204
|
-
step.base_step = self.name
|
|
205
|
-
if hasattr(self, "_parent") and self._parent:
|
|
206
|
-
# when self is a step
|
|
207
|
-
step = self._parent._steps.update(name, step)
|
|
208
|
-
step.set_parent(self._parent)
|
|
209
|
-
else:
|
|
210
|
-
# when self is the graph
|
|
211
|
-
step = self._steps.update(name, step)
|
|
212
|
-
step.set_parent(self)
|
|
213
|
-
|
|
137
|
+
def error_handler(self, step_name: str = None):
|
|
138
|
+
"""set error handler step (on failure/raise of this step)"""
|
|
139
|
+
if not step_name:
|
|
140
|
+
raise MLRunInvalidArgumentError("Must specify step_name")
|
|
141
|
+
self.on_error = step_name
|
|
214
142
|
return self
|
|
215
143
|
|
|
216
144
|
def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
|
|
@@ -258,11 +186,10 @@ class BaseStep(ModelObj):
|
|
|
258
186
|
|
|
259
187
|
def _call_error_handler(self, event, err, **kwargs):
|
|
260
188
|
"""call the error handler if exist"""
|
|
261
|
-
if
|
|
262
|
-
event.error =
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
return self._on_error_handler(event)
|
|
189
|
+
if self._on_error_handler:
|
|
190
|
+
event.error = err_to_str(err)
|
|
191
|
+
event.origin_state = self.fullname
|
|
192
|
+
return self._on_error_handler(event)
|
|
266
193
|
|
|
267
194
|
def path_to_step(self, path: str):
|
|
268
195
|
"""return step object from step relative/fullname"""
|
|
@@ -400,7 +327,6 @@ class TaskStep(BaseStep):
|
|
|
400
327
|
args = signature(self._handler).parameters
|
|
401
328
|
if args and "context" in list(args.keys()):
|
|
402
329
|
self._inject_context = True
|
|
403
|
-
self._set_error_handler()
|
|
404
330
|
return
|
|
405
331
|
|
|
406
332
|
self._class_object, self.class_name = self.get_step_class_object(
|
|
@@ -538,23 +464,14 @@ class TaskStep(BaseStep):
|
|
|
538
464
|
)
|
|
539
465
|
event.body = _update_result_body(self.result_path, event.body, result)
|
|
540
466
|
except Exception as exc:
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
event.body = _update_result_body(self.result_path, event.body, result)
|
|
545
|
-
else:
|
|
467
|
+
self._log_error(event, exc)
|
|
468
|
+
handled = self._call_error_handler(event, exc)
|
|
469
|
+
if not handled:
|
|
546
470
|
raise exc
|
|
471
|
+
event.terminated = True
|
|
547
472
|
return event
|
|
548
473
|
|
|
549
474
|
|
|
550
|
-
class ErrorStep(TaskStep):
|
|
551
|
-
"""error execution step, runs a class or handler"""
|
|
552
|
-
|
|
553
|
-
kind = "error_step"
|
|
554
|
-
_dict_fields = _task_step_fields + ["before", "base_step"]
|
|
555
|
-
_default_class = ""
|
|
556
|
-
|
|
557
|
-
|
|
558
475
|
class RouterStep(TaskStep):
|
|
559
476
|
"""router step, implement routing logic for running child routes"""
|
|
560
477
|
|
|
@@ -907,7 +824,6 @@ class FlowStep(BaseStep):
|
|
|
907
824
|
def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
|
|
908
825
|
"""initialize graph objects and classes"""
|
|
909
826
|
self.context = context
|
|
910
|
-
self._insert_all_error_handlers()
|
|
911
827
|
self.check_and_process_graph()
|
|
912
828
|
|
|
913
829
|
for step in self._steps.values():
|
|
@@ -950,11 +866,7 @@ class FlowStep(BaseStep):
|
|
|
950
866
|
|
|
951
867
|
responders = []
|
|
952
868
|
for step in self._steps.values():
|
|
953
|
-
if (
|
|
954
|
-
hasattr(step, "responder")
|
|
955
|
-
and step.responder
|
|
956
|
-
and step.kind != "error_step"
|
|
957
|
-
):
|
|
869
|
+
if hasattr(step, "responder") and step.responder:
|
|
958
870
|
responders.append(step.name)
|
|
959
871
|
if step.on_error and step.on_error in start_steps:
|
|
960
872
|
start_steps.remove(step.on_error)
|
|
@@ -1067,10 +979,6 @@ class FlowStep(BaseStep):
|
|
|
1067
979
|
# never set a step as its own error handler
|
|
1068
980
|
if step != error_step:
|
|
1069
981
|
step.async_object.set_recovery_step(error_step.async_object)
|
|
1070
|
-
for next_step in error_step.next or []:
|
|
1071
|
-
next_state = self[next_step]
|
|
1072
|
-
if next_state.async_object and error_step.async_object:
|
|
1073
|
-
error_step.async_object.to(next_state.async_object)
|
|
1074
982
|
|
|
1075
983
|
self._controller = source.run()
|
|
1076
984
|
|
|
@@ -1151,22 +1059,15 @@ class FlowStep(BaseStep):
|
|
|
1151
1059
|
try:
|
|
1152
1060
|
event = next_obj.run(event, *args, **kwargs)
|
|
1153
1061
|
except Exception as exc:
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
event.terminated = True
|
|
1158
|
-
return event
|
|
1159
|
-
else:
|
|
1062
|
+
self._log_error(event, exc, failed_step=next_obj.name)
|
|
1063
|
+
handled = self._call_error_handler(event, exc)
|
|
1064
|
+
if not handled:
|
|
1160
1065
|
raise exc
|
|
1066
|
+
event.terminated = True
|
|
1067
|
+
return event
|
|
1161
1068
|
|
|
1162
1069
|
if hasattr(event, "terminated") and event.terminated:
|
|
1163
1070
|
return event
|
|
1164
|
-
if (
|
|
1165
|
-
hasattr(event, "error")
|
|
1166
|
-
and isinstance(event.error, dict)
|
|
1167
|
-
and next_obj.name in event.error
|
|
1168
|
-
):
|
|
1169
|
-
next_obj = self._steps[next_obj.on_error]
|
|
1170
1071
|
next = next_obj.next
|
|
1171
1072
|
if next and len(next) > 1:
|
|
1172
1073
|
raise GraphError(
|
|
@@ -1202,33 +1103,6 @@ class FlowStep(BaseStep):
|
|
|
1202
1103
|
**kw,
|
|
1203
1104
|
)
|
|
1204
1105
|
|
|
1205
|
-
def _insert_all_error_handlers(self):
|
|
1206
|
-
"""
|
|
1207
|
-
insert all error steps to the graph
|
|
1208
|
-
run after deployment
|
|
1209
|
-
"""
|
|
1210
|
-
for name, step in self._steps.items():
|
|
1211
|
-
if step.kind == "error_step":
|
|
1212
|
-
self._insert_error_step(name, step)
|
|
1213
|
-
|
|
1214
|
-
def _insert_error_step(self, name, step):
|
|
1215
|
-
"""
|
|
1216
|
-
insert error step to the graph
|
|
1217
|
-
run after deployment
|
|
1218
|
-
"""
|
|
1219
|
-
if not step.before and not any(
|
|
1220
|
-
[step.name in other_step.after for other_step in self._steps.values()]
|
|
1221
|
-
):
|
|
1222
|
-
step.responder = True
|
|
1223
|
-
return
|
|
1224
|
-
|
|
1225
|
-
for step_name in step.before:
|
|
1226
|
-
if step_name not in self._steps.keys():
|
|
1227
|
-
raise MLRunInvalidArgumentError(
|
|
1228
|
-
f"cant set before, there is no step named {step_name}"
|
|
1229
|
-
)
|
|
1230
|
-
self[step_name].after_step(name)
|
|
1231
|
-
|
|
1232
1106
|
|
|
1233
1107
|
class RootFlowStep(FlowStep):
|
|
1234
1108
|
"""root flow step"""
|
|
@@ -1242,7 +1116,6 @@ classes_map = {
|
|
|
1242
1116
|
"router": RouterStep,
|
|
1243
1117
|
"flow": FlowStep,
|
|
1244
1118
|
"queue": QueueStep,
|
|
1245
|
-
"error_step": ErrorStep,
|
|
1246
1119
|
}
|
|
1247
1120
|
|
|
1248
1121
|
|
|
@@ -1282,8 +1155,15 @@ def _add_graphviz_flow(
|
|
|
1282
1155
|
_add_graphviz_router(sg, child)
|
|
1283
1156
|
else:
|
|
1284
1157
|
graph.node(child.fullname, label=child.name, shape=child.get_shape())
|
|
1285
|
-
|
|
1286
|
-
|
|
1158
|
+
after = child.after or []
|
|
1159
|
+
for item in after:
|
|
1160
|
+
previous_object = step[item]
|
|
1161
|
+
kw = (
|
|
1162
|
+
{"ltail": "cluster_" + previous_object.fullname}
|
|
1163
|
+
if previous_object.kind == StepKinds.router
|
|
1164
|
+
else {}
|
|
1165
|
+
)
|
|
1166
|
+
graph.edge(previous_object.fullname, child.fullname, **kw)
|
|
1287
1167
|
if child.on_error:
|
|
1288
1168
|
graph.edge(child.fullname, child.on_error, style="dashed")
|
|
1289
1169
|
|
|
@@ -1303,18 +1183,6 @@ def _add_graphviz_flow(
|
|
|
1303
1183
|
graph.edge(last_step, target.fullname)
|
|
1304
1184
|
|
|
1305
1185
|
|
|
1306
|
-
def _add_edges(items, step, graph, child, after=True):
|
|
1307
|
-
for item in items:
|
|
1308
|
-
next_or_prev_object = step[item]
|
|
1309
|
-
kw = {}
|
|
1310
|
-
if next_or_prev_object.kind == StepKinds.router:
|
|
1311
|
-
kw["ltail"] = f"cluster_{next_or_prev_object.fullname}"
|
|
1312
|
-
if after:
|
|
1313
|
-
graph.edge(next_or_prev_object.fullname, child.fullname, **kw)
|
|
1314
|
-
else:
|
|
1315
|
-
graph.edge(child.fullname, next_or_prev_object.fullname, **kw)
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
1186
|
def _generate_graphviz(
|
|
1319
1187
|
step,
|
|
1320
1188
|
renderer,
|
|
@@ -1487,7 +1355,7 @@ def _init_async_objects(context, steps):
|
|
|
1487
1355
|
endpoint, stream_path = parse_path(step.path)
|
|
1488
1356
|
stream_path = stream_path.strip("/")
|
|
1489
1357
|
step._async_object = storey.StreamTarget(
|
|
1490
|
-
storey.V3ioDriver(endpoint
|
|
1358
|
+
storey.V3ioDriver(endpoint),
|
|
1491
1359
|
stream_path,
|
|
1492
1360
|
context=context,
|
|
1493
1361
|
**options,
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -17,16 +17,17 @@ import traceback
|
|
|
17
17
|
from typing import Dict, Union
|
|
18
18
|
|
|
19
19
|
import mlrun
|
|
20
|
-
import mlrun.model_monitoring
|
|
21
20
|
from mlrun.api.schemas import (
|
|
22
21
|
ModelEndpoint,
|
|
23
22
|
ModelEndpointMetadata,
|
|
24
23
|
ModelEndpointSpec,
|
|
25
24
|
ModelEndpointStatus,
|
|
25
|
+
ModelMonitoringMode,
|
|
26
26
|
)
|
|
27
27
|
from mlrun.artifacts import ModelArtifact # noqa: F401
|
|
28
28
|
from mlrun.config import config
|
|
29
29
|
from mlrun.utils import logger, now_date, parse_versioned_object_uri
|
|
30
|
+
from mlrun.utils.model_monitoring import EndpointType
|
|
30
31
|
|
|
31
32
|
from .server import GraphServer
|
|
32
33
|
from .utils import StepToDict, _extract_input_data, _update_result_body
|
|
@@ -403,12 +404,11 @@ class _ModelLogPusher:
|
|
|
403
404
|
return base_data
|
|
404
405
|
|
|
405
406
|
def push(self, start, request, resp=None, op=None, error=None):
|
|
406
|
-
start_str = start.isoformat(sep=" ", timespec="microseconds")
|
|
407
407
|
if error:
|
|
408
408
|
data = self.base_data()
|
|
409
409
|
data["request"] = request
|
|
410
410
|
data["op"] = op
|
|
411
|
-
data["when"] =
|
|
411
|
+
data["when"] = str(start)
|
|
412
412
|
message = str(error)
|
|
413
413
|
if self.verbose:
|
|
414
414
|
message = f"{message}\n{traceback.format_exc()}"
|
|
@@ -445,7 +445,7 @@ class _ModelLogPusher:
|
|
|
445
445
|
data["request"] = request
|
|
446
446
|
data["op"] = op
|
|
447
447
|
data["resp"] = resp
|
|
448
|
-
data["when"] =
|
|
448
|
+
data["when"] = str(start)
|
|
449
449
|
data["microsec"] = microsec
|
|
450
450
|
if getattr(self.model, "metrics", None):
|
|
451
451
|
data["metrics"] = self.model.metrics
|
|
@@ -486,7 +486,7 @@ def _init_endpoint_record(
|
|
|
486
486
|
versioned_model_name = f"{model.name}:latest"
|
|
487
487
|
|
|
488
488
|
# Generating model endpoint ID based on function uri and model version
|
|
489
|
-
uid = mlrun.model_monitoring.
|
|
489
|
+
uid = mlrun.utils.model_monitoring.create_model_endpoint_id(
|
|
490
490
|
function_uri=graph_server.function_uri, versioned_model=versioned_model_name
|
|
491
491
|
).uid
|
|
492
492
|
|
|
@@ -511,21 +511,18 @@ def _init_endpoint_record(
|
|
|
511
511
|
project=project, kind="stream"
|
|
512
512
|
),
|
|
513
513
|
active=True,
|
|
514
|
-
monitoring_mode=
|
|
514
|
+
monitoring_mode=ModelMonitoringMode.enabled
|
|
515
515
|
if model.context.server.track_models
|
|
516
|
-
else
|
|
517
|
-
),
|
|
518
|
-
status=ModelEndpointStatus(
|
|
519
|
-
endpoint_type=mlrun.model_monitoring.EndpointType.NODE_EP
|
|
516
|
+
else ModelMonitoringMode.disabled,
|
|
520
517
|
),
|
|
518
|
+
status=ModelEndpointStatus(endpoint_type=EndpointType.NODE_EP),
|
|
521
519
|
)
|
|
522
520
|
|
|
523
521
|
db = mlrun.get_run_db()
|
|
524
|
-
|
|
525
522
|
db.create_model_endpoint(
|
|
526
523
|
project=project,
|
|
527
|
-
endpoint_id=uid,
|
|
528
|
-
model_endpoint=model_endpoint
|
|
524
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
525
|
+
model_endpoint=model_endpoint,
|
|
529
526
|
)
|
|
530
527
|
|
|
531
528
|
except Exception as e:
|
mlrun/utils/clones.py
CHANGED
|
@@ -51,7 +51,7 @@ def get_git_username_password_from_token(token):
|
|
|
51
51
|
# Github's access tokens have a known prefix according to their type. See
|
|
52
52
|
# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-authentication-to-github#githubs-token-formats
|
|
53
53
|
# We distinguish new fine-grained access tokens (begin with "github_pat_" from classic tokens.
|
|
54
|
-
if token.startswith("github_pat_")
|
|
54
|
+
if token.startswith("github_pat_"):
|
|
55
55
|
username = "oauth2"
|
|
56
56
|
password = token
|
|
57
57
|
else:
|