mlrun 1.10.0rc11__py3-none-any.whl → 1.10.0rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +2 -1
- mlrun/__main__.py +7 -1
- mlrun/artifacts/base.py +9 -3
- mlrun/artifacts/dataset.py +2 -1
- mlrun/artifacts/llm_prompt.py +6 -2
- mlrun/artifacts/model.py +2 -2
- mlrun/common/constants.py +1 -0
- mlrun/common/runtimes/constants.py +10 -1
- mlrun/common/schemas/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
- mlrun/common/schemas/serving.py +7 -0
- mlrun/config.py +21 -2
- mlrun/datastore/__init__.py +3 -1
- mlrun/datastore/alibaba_oss.py +1 -1
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +6 -31
- mlrun/datastore/datastore.py +109 -33
- mlrun/datastore/datastore_profile.py +31 -0
- mlrun/datastore/dbfs_store.py +1 -1
- mlrun/datastore/google_cloud_storage.py +2 -2
- mlrun/datastore/model_provider/__init__.py +13 -0
- mlrun/datastore/model_provider/model_provider.py +160 -0
- mlrun/datastore/model_provider/openai_provider.py +144 -0
- mlrun/datastore/remote_client.py +65 -0
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/storeytargets.py +1 -1
- mlrun/datastore/utils.py +22 -0
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/base.py +1 -1
- mlrun/db/httpdb.py +9 -4
- mlrun/db/nopdb.py +1 -1
- mlrun/execution.py +28 -7
- mlrun/launcher/base.py +23 -13
- mlrun/launcher/local.py +3 -1
- mlrun/launcher/remote.py +4 -2
- mlrun/model.py +65 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +175 -8
- mlrun/package/packagers_manager.py +2 -0
- mlrun/projects/operations.py +8 -1
- mlrun/projects/pipelines.py +40 -18
- mlrun/projects/project.py +28 -5
- mlrun/run.py +42 -2
- mlrun/runtimes/__init__.py +6 -0
- mlrun/runtimes/base.py +24 -6
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/local.py +1 -6
- mlrun/serving/server.py +1 -2
- mlrun/serving/states.py +438 -23
- mlrun/serving/system_steps.py +27 -29
- mlrun/utils/helpers.py +13 -2
- mlrun/utils/notifications/notification_pusher.py +15 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/METADATA +2 -2
- {mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/RECORD +59 -55
- {mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc13.dist-info}/top_level.txt +0 -0
mlrun/projects/pipelines.py
CHANGED
|
@@ -1081,34 +1081,56 @@ def rerun_workflow(
|
|
|
1081
1081
|
:param run_uid: The run UID of the original workflow to retry.
|
|
1082
1082
|
:param project_name: The project name.
|
|
1083
1083
|
"""
|
|
1084
|
+
db = mlrun.get_run_db()
|
|
1084
1085
|
|
|
1085
1086
|
try:
|
|
1086
|
-
#
|
|
1087
|
-
|
|
1088
|
-
# Retry the pipeline - TODO: add submit-direct flag when created
|
|
1089
|
-
db = mlrun.get_run_db()
|
|
1087
|
+
# Invoke the KFP retry endpoint (direct-submit mode)
|
|
1090
1088
|
new_pipeline_id = db.retry_pipeline(
|
|
1091
|
-
run_uid,
|
|
1089
|
+
run_id=run_uid,
|
|
1090
|
+
project=project_name,
|
|
1091
|
+
submit_mode=mlrun_constants.WorkflowSubmitMode.direct,
|
|
1092
|
+
)
|
|
1093
|
+
logger.info(
|
|
1094
|
+
"KFP retry submitted",
|
|
1095
|
+
new_pipeline_id=new_pipeline_id,
|
|
1096
|
+
rerun_of_workflow=run_uid,
|
|
1092
1097
|
)
|
|
1093
1098
|
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1099
|
+
except mlrun.errors.MLRunHTTPError as http_exc:
|
|
1100
|
+
logger.error(
|
|
1101
|
+
"Failed calling KFP retry API",
|
|
1102
|
+
run_id=run_uid,
|
|
1103
|
+
error=err_to_str(http_exc),
|
|
1097
1104
|
)
|
|
1098
|
-
|
|
1105
|
+
raise
|
|
1099
1106
|
|
|
1100
|
-
|
|
1107
|
+
# Enqueue "running" notifications server-side for this RerunRunner run
|
|
1108
|
+
db.push_run_notifications(context.uid, project_name)
|
|
1101
1109
|
|
|
1102
|
-
|
|
1103
|
-
|
|
1110
|
+
context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id)
|
|
1111
|
+
context.update_run()
|
|
1112
|
+
|
|
1113
|
+
context.log_result("workflow_id", new_pipeline_id)
|
|
1114
|
+
|
|
1115
|
+
try:
|
|
1116
|
+
pipeline = wait_for_pipeline_completion(
|
|
1104
1117
|
new_pipeline_id,
|
|
1105
1118
|
project=project_name,
|
|
1106
1119
|
)
|
|
1107
|
-
|
|
1108
|
-
# Temporary exception
|
|
1109
1120
|
except Exception as exc:
|
|
1110
|
-
|
|
1111
|
-
|
|
1121
|
+
mlrun.utils.logger.error(
|
|
1122
|
+
"Failed waiting for workflow completion",
|
|
1123
|
+
rerun_pipeline_id=new_pipeline_id,
|
|
1124
|
+
exc=err_to_str(exc),
|
|
1125
|
+
)
|
|
1126
|
+
else:
|
|
1127
|
+
final_state = pipeline["run"]["status"]
|
|
1128
|
+
context.log_result("workflow_state", final_state, commit=True)
|
|
1129
|
+
|
|
1130
|
+
if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1131
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1132
|
+
f"Pipeline retry of {run_uid} finished in state={final_state}"
|
|
1133
|
+
)
|
|
1112
1134
|
|
|
1113
1135
|
|
|
1114
1136
|
def load_and_run(context, *args, **kwargs):
|
|
@@ -1201,13 +1223,13 @@ def load_and_run_workflow(
|
|
|
1201
1223
|
start_notifications = [
|
|
1202
1224
|
notification
|
|
1203
1225
|
for notification in context.get_notifications(unmask_secret_params=True)
|
|
1204
|
-
if
|
|
1226
|
+
if mlrun.common.runtimes.constants.RunStates.running in notification.when
|
|
1205
1227
|
]
|
|
1206
1228
|
|
|
1207
1229
|
# Prevent redundant notifications for run completion by ensuring that notifications are only triggered when the run
|
|
1208
1230
|
# reaches the "running" state, as the server already handles the completion notifications.
|
|
1209
1231
|
for notification in start_notifications:
|
|
1210
|
-
notification.when = [
|
|
1232
|
+
notification.when = [mlrun.common.runtimes.constants.RunStates.running]
|
|
1211
1233
|
|
|
1212
1234
|
workflow_log_message = workflow_name or workflow_path
|
|
1213
1235
|
context.logger.info(
|
mlrun/projects/project.py
CHANGED
|
@@ -159,7 +159,8 @@ def new_project(
|
|
|
159
159
|
parameters: Optional[dict] = None,
|
|
160
160
|
default_function_node_selector: Optional[dict] = None,
|
|
161
161
|
) -> "MlrunProject":
|
|
162
|
-
"""Create a new MLRun project, optionally load it from a yaml/zip/git template
|
|
162
|
+
"""Create a new MLRun project, optionally load it from a yaml/zip/git template.
|
|
163
|
+
The project will become the active project for the current session.
|
|
163
164
|
|
|
164
165
|
A new project is created and returned, you can customize the project by placing a project_setup.py file
|
|
165
166
|
in the project root dir, it will be executed upon project creation or loading.
|
|
@@ -326,7 +327,8 @@ def load_project(
|
|
|
326
327
|
parameters: Optional[dict] = None,
|
|
327
328
|
allow_cross_project: Optional[bool] = None,
|
|
328
329
|
) -> "MlrunProject":
|
|
329
|
-
"""Load an MLRun project from git or tar or dir
|
|
330
|
+
"""Load an MLRun project from git or tar or dir. The project will become the active project for
|
|
331
|
+
the current session.
|
|
330
332
|
|
|
331
333
|
MLRun looks for a project.yaml file with project definition and objects in the project root path
|
|
332
334
|
and use it to initialize the project, in addition it runs the project_setup.py file (if it exists)
|
|
@@ -1940,6 +1942,11 @@ class MlrunProject(ModelObj):
|
|
|
1940
1942
|
:returns: The logged `LLMPromptArtifact` object.
|
|
1941
1943
|
"""
|
|
1942
1944
|
|
|
1945
|
+
if not prompt_string and not prompt_path:
|
|
1946
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1947
|
+
"Either 'prompt_string' or 'prompt_path' must be provided"
|
|
1948
|
+
)
|
|
1949
|
+
|
|
1943
1950
|
llm_prompt = LLMPromptArtifact(
|
|
1944
1951
|
key=key,
|
|
1945
1952
|
project=self.name,
|
|
@@ -2688,8 +2695,8 @@ class MlrunProject(ModelObj):
|
|
|
2688
2695
|
requirements_file: str = "",
|
|
2689
2696
|
) -> mlrun.runtimes.BaseRuntime:
|
|
2690
2697
|
"""
|
|
2691
|
-
|
|
2692
|
-
|
|
2698
|
+
Update or add a function object to the project.
|
|
2699
|
+
Function can be provided as an object (func) or a .py/.ipynb/.yaml URL.
|
|
2693
2700
|
|
|
2694
2701
|
| Creating a function from a single file is done by specifying ``func`` and disabling ``with_repo``.
|
|
2695
2702
|
| Creating a function with project source (specify ``with_repo=True``):
|
|
@@ -2734,6 +2741,20 @@ class MlrunProject(ModelObj):
|
|
|
2734
2741
|
# By providing a path to a pip requirements file
|
|
2735
2742
|
proj.set_function("my.py", requirements="requirements.txt")
|
|
2736
2743
|
|
|
2744
|
+
One of the most important parameters is 'kind', used to specify the chosen runtime. The options are:
|
|
2745
|
+
- local: execute a local python or shell script
|
|
2746
|
+
- job: insert the code into a Kubernetes pod and execute it
|
|
2747
|
+
- nuclio: insert the code into a real-time serverless nuclio function
|
|
2748
|
+
- serving: insert code into orchestrated nuclio function(s) forming a DAG
|
|
2749
|
+
- dask: run the specified python code / script as Dask Distributed job
|
|
2750
|
+
- mpijob: run distributed Horovod jobs over the MPI job operator
|
|
2751
|
+
- spark: run distributed Spark job using Spark Kubernetes Operator
|
|
2752
|
+
- remote-spark: run distributed Spark job on remote Spark service
|
|
2753
|
+
- databricks: run code on Databricks cluster (python scripts, Spark etc.)
|
|
2754
|
+
- application: run a long living application (e.g. a web server, UI, etc.)
|
|
2755
|
+
|
|
2756
|
+
Learn more about :doc:`../../concepts/functions-overview`.
|
|
2757
|
+
|
|
2737
2758
|
:param func: Function object or spec/code url, None refers to current Notebook
|
|
2738
2759
|
:param name: Name of the function (under the project), can be specified with a tag to support
|
|
2739
2760
|
Versions (e.g. myfunc:v1). If the `tag` parameter is provided, the tag in the name
|
|
@@ -3967,6 +3988,7 @@ class MlrunProject(ModelObj):
|
|
|
3967
3988
|
builder_env: Optional[dict] = None,
|
|
3968
3989
|
reset_on_run: Optional[bool] = None,
|
|
3969
3990
|
output_path: Optional[str] = None,
|
|
3991
|
+
retry: Optional[Union[mlrun.model.Retry, dict]] = None,
|
|
3970
3992
|
) -> typing.Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
3971
3993
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
3972
3994
|
|
|
@@ -4029,7 +4051,7 @@ class MlrunProject(ModelObj):
|
|
|
4029
4051
|
This ensures latest code changes are executed. This argument must be used in
|
|
4030
4052
|
conjunction with the local=True argument.
|
|
4031
4053
|
:param output_path: path to store artifacts, when running in a workflow this will be set automatically
|
|
4032
|
-
|
|
4054
|
+
:param retry: Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
|
|
4033
4055
|
:return: MLRun RunObject or PipelineNodeWrapper
|
|
4034
4056
|
"""
|
|
4035
4057
|
if artifact_path:
|
|
@@ -4068,6 +4090,7 @@ class MlrunProject(ModelObj):
|
|
|
4068
4090
|
returns=returns,
|
|
4069
4091
|
builder_env=builder_env,
|
|
4070
4092
|
reset_on_run=reset_on_run,
|
|
4093
|
+
retry=retry,
|
|
4071
4094
|
)
|
|
4072
4095
|
|
|
4073
4096
|
def build_function(
|
mlrun/run.py
CHANGED
|
@@ -36,6 +36,7 @@ import mlrun.common.schemas
|
|
|
36
36
|
import mlrun.errors
|
|
37
37
|
import mlrun.utils.helpers
|
|
38
38
|
import mlrun_pipelines.utils
|
|
39
|
+
from mlrun.datastore.model_provider.model_provider import ModelProvider
|
|
39
40
|
from mlrun_pipelines.common.models import RunStatuses
|
|
40
41
|
from mlrun_pipelines.common.ops import format_summary_from_kfp_run, show_kfp_run
|
|
41
42
|
|
|
@@ -894,7 +895,7 @@ def _run_pipeline(
|
|
|
894
895
|
def retry_pipeline(
|
|
895
896
|
run_id: str,
|
|
896
897
|
project: str,
|
|
897
|
-
) -> str:
|
|
898
|
+
) -> typing.Union[str, dict[str, str]]:
|
|
898
899
|
"""Retry a pipeline run.
|
|
899
900
|
|
|
900
901
|
This function retries a previously executed pipeline run using the specified run ID. If the run is not in a
|
|
@@ -913,10 +914,33 @@ def retry_pipeline(
|
|
|
913
914
|
"Please set the dbpath URL."
|
|
914
915
|
)
|
|
915
916
|
|
|
916
|
-
|
|
917
|
+
# Invoke retry pipeline run. Depending on the context, this call returns either:
|
|
918
|
+
# 1. A simple string of a workflow-id, for direct retries or non-remote workflows, or
|
|
919
|
+
# 2. A dict payload representing a WorkflowResponse when rerunning remote workflows.
|
|
920
|
+
rerun_response = mldb.retry_pipeline(
|
|
917
921
|
run_id=run_id,
|
|
918
922
|
project=project,
|
|
919
923
|
)
|
|
924
|
+
if isinstance(rerun_response, str):
|
|
925
|
+
pipeline_run_id = rerun_response
|
|
926
|
+
else:
|
|
927
|
+
rerun_response = mlrun.common.schemas.WorkflowResponse(**rerun_response)
|
|
928
|
+
|
|
929
|
+
def _fetch_workflow_id():
|
|
930
|
+
rerun = mldb.read_run(rerun_response.run_id, project)
|
|
931
|
+
workflow_id = rerun["metadata"]["labels"].get("workflow-id")
|
|
932
|
+
if not workflow_id:
|
|
933
|
+
raise mlrun.errors.MLRunRuntimeError("workflow-id label not set yet")
|
|
934
|
+
return workflow_id
|
|
935
|
+
|
|
936
|
+
pipeline_run_id = mlrun.utils.helpers.retry_until_successful(
|
|
937
|
+
backoff=3,
|
|
938
|
+
timeout=int(mlrun.mlconf.workflows.timeouts.remote),
|
|
939
|
+
logger=logger,
|
|
940
|
+
verbose=False,
|
|
941
|
+
_function=_fetch_workflow_id,
|
|
942
|
+
)
|
|
943
|
+
|
|
920
944
|
if pipeline_run_id == run_id:
|
|
921
945
|
logger.info(
|
|
922
946
|
f"Retried pipeline run ID={pipeline_run_id}, check UI for progress."
|
|
@@ -1152,6 +1176,22 @@ def get_dataitem(url, secrets=None, db=None) -> "DataItem":
|
|
|
1152
1176
|
return stores.object(url=url)
|
|
1153
1177
|
|
|
1154
1178
|
|
|
1179
|
+
def get_model_provider(
|
|
1180
|
+
url,
|
|
1181
|
+
secrets=None,
|
|
1182
|
+
db=None,
|
|
1183
|
+
default_invoke_kwargs: Optional[dict] = None,
|
|
1184
|
+
raise_missing_schema_exception=True,
|
|
1185
|
+
) -> ModelProvider:
|
|
1186
|
+
"""get mlrun dataitem object (from path/url)"""
|
|
1187
|
+
store_manager.set(secrets, db=db)
|
|
1188
|
+
return store_manager.model_provider_object(
|
|
1189
|
+
url=url,
|
|
1190
|
+
default_invoke_kwargs=default_invoke_kwargs,
|
|
1191
|
+
raise_missing_schema_exception=raise_missing_schema_exception,
|
|
1192
|
+
)
|
|
1193
|
+
|
|
1194
|
+
|
|
1155
1195
|
def download_object(url, target, secrets=None):
|
|
1156
1196
|
"""download mlrun dataitem (from path/url to target path)"""
|
|
1157
1197
|
stores = store_manager.set(secrets)
|
mlrun/runtimes/__init__.py
CHANGED
mlrun/runtimes/base.py
CHANGED
|
@@ -33,6 +33,7 @@ import mlrun.launcher.factory
|
|
|
33
33
|
import mlrun.utils.helpers
|
|
34
34
|
import mlrun.utils.notifications
|
|
35
35
|
import mlrun.utils.regex
|
|
36
|
+
from mlrun.common.runtimes.constants import RunStates
|
|
36
37
|
from mlrun.model import (
|
|
37
38
|
BaseMetadata,
|
|
38
39
|
HyperParamOptions,
|
|
@@ -319,6 +320,7 @@ class BaseRuntime(ModelObj):
|
|
|
319
320
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
320
321
|
reset_on_run: Optional[bool] = None,
|
|
321
322
|
output_path: Optional[str] = "",
|
|
323
|
+
retry: Optional[Union[mlrun.model.Retry, dict]] = None,
|
|
322
324
|
**launcher_kwargs,
|
|
323
325
|
) -> RunObject:
|
|
324
326
|
"""
|
|
@@ -377,6 +379,7 @@ class BaseRuntime(ModelObj):
|
|
|
377
379
|
This ensures latest code changes are executed. This argument must be used in
|
|
378
380
|
conjunction with the local=True argument.
|
|
379
381
|
:param output_path: Default artifact output path.
|
|
382
|
+
:param retry: Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
|
|
380
383
|
:return: Run context object (RunObject) with run metadata, results and status
|
|
381
384
|
"""
|
|
382
385
|
if artifact_path or out_path:
|
|
@@ -414,6 +417,7 @@ class BaseRuntime(ModelObj):
|
|
|
414
417
|
returns=returns,
|
|
415
418
|
state_thresholds=state_thresholds,
|
|
416
419
|
reset_on_run=reset_on_run,
|
|
420
|
+
retry=retry,
|
|
417
421
|
)
|
|
418
422
|
|
|
419
423
|
def _get_db_run(
|
|
@@ -570,12 +574,27 @@ class BaseRuntime(ModelObj):
|
|
|
570
574
|
updates = None
|
|
571
575
|
last_state = get_in(resp, "status.state", "")
|
|
572
576
|
kind = get_in(resp, "metadata.labels.kind", "")
|
|
573
|
-
if last_state
|
|
577
|
+
if last_state in RunStates.error_states() or err:
|
|
578
|
+
new_state = RunStates.error
|
|
579
|
+
status_text = None
|
|
580
|
+
max_retries = get_in(resp, "spec.retry.count", 0)
|
|
581
|
+
retry_count = get_in(resp, "status.retry_count", 0) or 0
|
|
582
|
+
attempts = retry_count + 1
|
|
583
|
+
if max_retries:
|
|
584
|
+
if retry_count < max_retries:
|
|
585
|
+
new_state = RunStates.pending_retry
|
|
586
|
+
status_text = f"Run failed attempt {attempts} of {max_retries + 1}"
|
|
587
|
+
elif retry_count >= max_retries:
|
|
588
|
+
status_text = f"Run failed after {attempts} attempts"
|
|
589
|
+
|
|
574
590
|
updates = {
|
|
575
591
|
"status.last_update": now_date().isoformat(),
|
|
576
|
-
"status.state":
|
|
592
|
+
"status.state": new_state,
|
|
577
593
|
}
|
|
578
|
-
update_in(resp, "status.state",
|
|
594
|
+
update_in(resp, "status.state", new_state)
|
|
595
|
+
if status_text:
|
|
596
|
+
updates["status.status_text"] = status_text
|
|
597
|
+
update_in(resp, "status.status_text", status_text)
|
|
579
598
|
if err:
|
|
580
599
|
update_in(resp, "status.error", err_to_str(err))
|
|
581
600
|
err = get_in(resp, "status.error")
|
|
@@ -584,9 +603,8 @@ class BaseRuntime(ModelObj):
|
|
|
584
603
|
|
|
585
604
|
elif (
|
|
586
605
|
not was_none
|
|
587
|
-
and last_state !=
|
|
588
|
-
and last_state
|
|
589
|
-
not in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
606
|
+
and last_state != RunStates.completed
|
|
607
|
+
and last_state not in RunStates.error_and_abortion_states()
|
|
590
608
|
):
|
|
591
609
|
try:
|
|
592
610
|
runtime_cls = mlrun.runtimes.get_runtime_class(kind)
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -505,6 +505,7 @@ class DaskCluster(KubejobRuntime):
|
|
|
505
505
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
506
506
|
reset_on_run: Optional[bool] = None,
|
|
507
507
|
output_path: Optional[str] = "",
|
|
508
|
+
retry: Optional[Union[mlrun.model.Retry, dict]] = None,
|
|
508
509
|
**launcher_kwargs,
|
|
509
510
|
) -> RunObject:
|
|
510
511
|
if state_thresholds:
|
|
@@ -233,6 +233,7 @@ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
|
|
|
233
233
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
234
234
|
reset_on_run: Optional[bool] = None,
|
|
235
235
|
output_path: Optional[str] = "",
|
|
236
|
+
retry: Optional[Union[mlrun.model.Retry, dict]] = None,
|
|
236
237
|
**launcher_kwargs,
|
|
237
238
|
) -> RunObject:
|
|
238
239
|
if local:
|
mlrun/runtimes/local.py
CHANGED
|
@@ -34,6 +34,7 @@ from nuclio import Event
|
|
|
34
34
|
|
|
35
35
|
import mlrun
|
|
36
36
|
import mlrun.common.constants as mlrun_constants
|
|
37
|
+
import mlrun.common.runtimes.constants
|
|
37
38
|
from mlrun.lists import RunList
|
|
38
39
|
|
|
39
40
|
from ..errors import err_to_str
|
|
@@ -315,15 +316,9 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
|
|
|
315
316
|
return context.to_dict()
|
|
316
317
|
|
|
317
318
|
# if RunError was raised it means that the error was raised as part of running the function
|
|
318
|
-
# ( meaning the state was already updated to error ) therefore we just re-raise the error
|
|
319
319
|
except RunError as err:
|
|
320
320
|
raise err
|
|
321
|
-
# this exception handling is for the case where we fail on pre-loading or post-running the function
|
|
322
|
-
# and the state was not updated to error yet, therefore we update the state to error and raise as RunError
|
|
323
321
|
except Exception as exc:
|
|
324
|
-
# set_state here is mainly for sanity, as we will raise RunError which is expected to be handled
|
|
325
|
-
# by the caller and will set the state to error ( in `update_run_state` )
|
|
326
|
-
context.set_state(error=err_to_str(exc), commit=True)
|
|
327
322
|
logger.error(f"Run error, {traceback.format_exc()}")
|
|
328
323
|
raise RunError(
|
|
329
324
|
"Failed on pre-loading / post-running of the function"
|
mlrun/serving/server.py
CHANGED
|
@@ -395,7 +395,6 @@ def add_monitoring_general_steps(
|
|
|
395
395
|
monitor_flow_step = graph.add_step(
|
|
396
396
|
"mlrun.serving.system_steps.BackgroundTaskStatus",
|
|
397
397
|
"background_task_status_step",
|
|
398
|
-
context=context,
|
|
399
398
|
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
400
399
|
)
|
|
401
400
|
graph.add_step(
|
|
@@ -410,7 +409,6 @@ def add_monitoring_general_steps(
|
|
|
410
409
|
"monitoring_pre_processor_step",
|
|
411
410
|
after="filter_none",
|
|
412
411
|
full_event=True,
|
|
413
|
-
context=context,
|
|
414
412
|
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
415
413
|
)
|
|
416
414
|
# flatten the events
|
|
@@ -790,6 +788,7 @@ class GraphContext:
|
|
|
790
788
|
self.verbose = False
|
|
791
789
|
self.stream = None
|
|
792
790
|
self.root = None
|
|
791
|
+
self.executor: Optional[storey.flow.RunnableExecutor] = None
|
|
793
792
|
|
|
794
793
|
if nuclio_context:
|
|
795
794
|
self.logger: NuclioLogger = nuclio_context.logger
|