mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +23 -111
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +46 -42
- mlrun/artifacts/model.py +9 -141
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/constants.py +65 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +10 -5
- mlrun/common/schemas/alert.py +92 -11
- mlrun/common/schemas/api_gateway.py +56 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +15 -3
- mlrun/common/schemas/model_monitoring/constants.py +58 -7
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +5 -11
- mlrun/common/types.py +1 -0
- mlrun/config.py +27 -9
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/datastore_profile.py +56 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +147 -7
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +110 -42
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +54 -10
- mlrun/db/httpdb.py +282 -79
- mlrun/db/nopdb.py +52 -10
- mlrun/errors.py +11 -0
- mlrun/execution.py +24 -9
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/feature_vector.py +8 -0
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +16 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +5 -3
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/lists.py +6 -2
- mlrun/model.py +45 -21
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +280 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +22 -37
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +46 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +57 -216
- mlrun/model_monitoring/writer.py +134 -124
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +19 -12
- mlrun/projects/pipelines.py +79 -102
- mlrun/projects/project.py +265 -103
- mlrun/render.py +15 -14
- mlrun/run.py +16 -46
- mlrun/runtimes/__init__.py +6 -3
- mlrun/runtimes/base.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +194 -84
- mlrun/runtimes/nuclio/application/application.py +170 -8
- mlrun/runtimes/nuclio/function.py +39 -49
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +9 -3
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -45
- mlrun/serving/server.py +2 -1
- mlrun/serving/v2_serving.py +5 -1
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +107 -75
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +34 -7
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +150 -130
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0
mlrun/projects/operations.py
CHANGED
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
|
+
import mlrun.common.constants as mlrun_constants
|
|
21
22
|
from mlrun.utils import hub_prefix
|
|
22
23
|
|
|
23
24
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -76,7 +77,7 @@ def run_function(
|
|
|
76
77
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
78
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
79
|
builder_env: Optional[list] = None,
|
|
79
|
-
) -> Union[mlrun.model.RunObject,
|
|
80
|
+
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
80
81
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
82
|
|
|
82
83
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +87,7 @@ def run_function(
|
|
|
86
87
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
88
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
89
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
90
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
91
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
92
|
|
|
92
93
|
example (use with function object)::
|
|
@@ -166,7 +167,7 @@ def run_function(
|
|
|
166
167
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
167
168
|
"key": "the_key".
|
|
168
169
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
169
|
-
:return: MLRun RunObject or
|
|
170
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
170
171
|
"""
|
|
171
172
|
engine, function = _get_engine_and_function(function, project_object)
|
|
172
173
|
task = mlrun.new_task(
|
|
@@ -190,7 +191,9 @@ def run_function(
|
|
|
190
191
|
local = pipeline_context.is_run_local(local)
|
|
191
192
|
task.metadata.labels = task.metadata.labels or labels or {}
|
|
192
193
|
if pipeline_context.workflow_id:
|
|
193
|
-
task.metadata.labels[
|
|
194
|
+
task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
|
|
195
|
+
pipeline_context.workflow_id
|
|
196
|
+
)
|
|
194
197
|
if function.kind == "local":
|
|
195
198
|
command, function = mlrun.run.load_func_code(function)
|
|
196
199
|
function.spec.command = command
|
|
@@ -225,9 +228,9 @@ def run_function(
|
|
|
225
228
|
class BuildStatus:
|
|
226
229
|
"""returned status from build operation"""
|
|
227
230
|
|
|
228
|
-
def __init__(self, ready, outputs=
|
|
231
|
+
def __init__(self, ready, outputs=None, function=None):
|
|
229
232
|
self.ready = ready
|
|
230
|
-
self.outputs = outputs
|
|
233
|
+
self.outputs = outputs or {}
|
|
231
234
|
self.function = function
|
|
232
235
|
|
|
233
236
|
def after(self, step):
|
|
@@ -254,7 +257,7 @@ def build_function(
|
|
|
254
257
|
overwrite_build_params: bool = False,
|
|
255
258
|
extra_args: str = None,
|
|
256
259
|
force_build: bool = False,
|
|
257
|
-
) -> Union[BuildStatus,
|
|
260
|
+
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
258
261
|
"""deploy ML function, build container with its dependencies
|
|
259
262
|
|
|
260
263
|
:param function: Name of the function (in the project) or function object
|
|
@@ -294,7 +297,11 @@ def build_function(
|
|
|
294
297
|
if overwrite_build_params:
|
|
295
298
|
function.spec.build.commands = None
|
|
296
299
|
if requirements or requirements_file:
|
|
297
|
-
function.with_requirements(
|
|
300
|
+
function.with_requirements(
|
|
301
|
+
requirements=requirements,
|
|
302
|
+
requirements_file=requirements_file,
|
|
303
|
+
overwrite=True,
|
|
304
|
+
)
|
|
298
305
|
if commands:
|
|
299
306
|
function.with_commands(commands)
|
|
300
307
|
return function.deploy_step(
|
|
@@ -336,9 +343,9 @@ def build_function(
|
|
|
336
343
|
class DeployStatus:
|
|
337
344
|
"""returned status from deploy operation"""
|
|
338
345
|
|
|
339
|
-
def __init__(self, state, outputs=
|
|
346
|
+
def __init__(self, state, outputs=None, function=None):
|
|
340
347
|
self.state = state
|
|
341
|
-
self.outputs = outputs
|
|
348
|
+
self.outputs = outputs or {}
|
|
342
349
|
self.function = function
|
|
343
350
|
|
|
344
351
|
def after(self, step):
|
|
@@ -358,7 +365,7 @@ def deploy_function(
|
|
|
358
365
|
builder_env: dict = None,
|
|
359
366
|
project_object=None,
|
|
360
367
|
mock: bool = None,
|
|
361
|
-
) -> Union[DeployStatus,
|
|
368
|
+
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
362
369
|
"""deploy real-time (nuclio based) functions
|
|
363
370
|
|
|
364
371
|
:param function: name of the function (in the project) or function object
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -20,18 +20,19 @@ import tempfile
|
|
|
20
20
|
import typing
|
|
21
21
|
import uuid
|
|
22
22
|
|
|
23
|
-
import
|
|
24
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
25
25
|
from kfp.compiler import compiler
|
|
26
|
+
from mlrun_pipelines.helpers import new_pipe_metadata
|
|
26
27
|
|
|
27
28
|
import mlrun
|
|
29
|
+
import mlrun.common.runtimes.constants
|
|
28
30
|
import mlrun.common.schemas
|
|
29
31
|
import mlrun.utils.notifications
|
|
30
32
|
from mlrun.errors import err_to_str
|
|
31
33
|
from mlrun.utils import (
|
|
32
34
|
get_ui_url,
|
|
33
35
|
logger,
|
|
34
|
-
new_pipe_metadata,
|
|
35
36
|
normalize_workflow_name,
|
|
36
37
|
retry_until_successful,
|
|
37
38
|
)
|
|
@@ -301,72 +302,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
301
302
|
}
|
|
302
303
|
|
|
303
304
|
|
|
304
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
305
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
306
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
307
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
308
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
309
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
310
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
311
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
312
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
313
|
-
def _create_enriched_mlrun_workflow(
|
|
314
|
-
self,
|
|
315
|
-
pipeline_func: typing.Callable,
|
|
316
|
-
pipeline_name: typing.Optional[str] = None,
|
|
317
|
-
pipeline_description: typing.Optional[str] = None,
|
|
318
|
-
params_list: typing.Optional[list[dsl.PipelineParam]] = None,
|
|
319
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
320
|
-
):
|
|
321
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
322
|
-
workflow = self._original_create_workflow(
|
|
323
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
324
|
-
)
|
|
325
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
326
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
327
|
-
# we know can be raised.
|
|
328
|
-
try:
|
|
329
|
-
functions = []
|
|
330
|
-
if pipeline_context.functions:
|
|
331
|
-
try:
|
|
332
|
-
functions = pipeline_context.functions.values()
|
|
333
|
-
except Exception as err:
|
|
334
|
-
logger.debug(
|
|
335
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
336
|
-
error=err_to_str(err),
|
|
337
|
-
)
|
|
338
|
-
return workflow
|
|
339
|
-
|
|
340
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
341
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
342
|
-
if kfp_step_template.get("container"):
|
|
343
|
-
for function_obj in functions:
|
|
344
|
-
# we condition within each function since the comparison between the function and
|
|
345
|
-
# the kfp pod may change depending on the attribute type.
|
|
346
|
-
_set_function_attribute_on_kfp_pod(
|
|
347
|
-
kfp_step_template,
|
|
348
|
-
function_obj,
|
|
349
|
-
"PriorityClassName",
|
|
350
|
-
"priority_class_name",
|
|
351
|
-
)
|
|
352
|
-
_enrich_kfp_pod_security_context(
|
|
353
|
-
kfp_step_template,
|
|
354
|
-
function_obj,
|
|
355
|
-
)
|
|
356
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
357
|
-
raise
|
|
358
|
-
except Exception as err:
|
|
359
|
-
logger.debug(
|
|
360
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
361
|
-
)
|
|
362
|
-
return workflow
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
# patching function as class method
|
|
366
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
367
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
368
|
-
|
|
369
|
-
|
|
370
305
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
371
306
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
372
307
|
key, project.metadata.name
|
|
@@ -437,7 +372,7 @@ class _PipelineRunStatus:
|
|
|
437
372
|
engine: type["_PipelineRunner"],
|
|
438
373
|
project: "mlrun.projects.MlrunProject",
|
|
439
374
|
workflow: WorkflowSpec = None,
|
|
440
|
-
state:
|
|
375
|
+
state: mlrun_pipelines.common.models.RunStatuses = "",
|
|
441
376
|
exc: Exception = None,
|
|
442
377
|
):
|
|
443
378
|
"""
|
|
@@ -457,7 +392,10 @@ class _PipelineRunStatus:
|
|
|
457
392
|
|
|
458
393
|
@property
|
|
459
394
|
def state(self):
|
|
460
|
-
if
|
|
395
|
+
if (
|
|
396
|
+
self._state
|
|
397
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
398
|
+
):
|
|
461
399
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
462
400
|
return self._state
|
|
463
401
|
|
|
@@ -506,6 +444,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
506
444
|
namespace=None,
|
|
507
445
|
source=None,
|
|
508
446
|
notifications: list[mlrun.model.Notification] = None,
|
|
447
|
+
send_start_notification: bool = True,
|
|
509
448
|
) -> _PipelineRunStatus:
|
|
510
449
|
pass
|
|
511
450
|
|
|
@@ -542,6 +481,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
542
481
|
timeout=None,
|
|
543
482
|
expected_statuses=None,
|
|
544
483
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
484
|
+
**kwargs,
|
|
545
485
|
):
|
|
546
486
|
pass
|
|
547
487
|
|
|
@@ -584,6 +524,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
584
524
|
namespace=None,
|
|
585
525
|
source=None,
|
|
586
526
|
notifications: list[mlrun.model.Notification] = None,
|
|
527
|
+
send_start_notification: bool = True,
|
|
587
528
|
) -> _PipelineRunStatus:
|
|
588
529
|
pipeline_context.set(project, workflow_spec)
|
|
589
530
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -632,13 +573,13 @@ class _KFPRunner(_PipelineRunner):
|
|
|
632
573
|
func_name=func.metadata.name,
|
|
633
574
|
exc_info=err_to_str(exc),
|
|
634
575
|
)
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
576
|
+
if send_start_notification:
|
|
577
|
+
project.notifiers.push_pipeline_start_message(
|
|
578
|
+
project.metadata.name,
|
|
579
|
+
project.get_param("commit_id", None),
|
|
580
|
+
run_id,
|
|
581
|
+
True,
|
|
582
|
+
)
|
|
642
583
|
pipeline_context.clear()
|
|
643
584
|
return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
|
|
644
585
|
|
|
@@ -673,6 +614,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
673
614
|
timeout=None,
|
|
674
615
|
expected_statuses=None,
|
|
675
616
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
617
|
+
**kwargs,
|
|
676
618
|
):
|
|
677
619
|
if timeout is None:
|
|
678
620
|
timeout = 60 * 60
|
|
@@ -728,6 +670,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
728
670
|
namespace=None,
|
|
729
671
|
source=None,
|
|
730
672
|
notifications: list[mlrun.model.Notification] = None,
|
|
673
|
+
send_start_notification: bool = True,
|
|
731
674
|
) -> _PipelineRunStatus:
|
|
732
675
|
pipeline_context.set(project, workflow_spec)
|
|
733
676
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -748,13 +691,15 @@ class _LocalRunner(_PipelineRunner):
|
|
|
748
691
|
original_source = project.spec.source
|
|
749
692
|
project.set_source(source=source)
|
|
750
693
|
pipeline_context.workflow_artifact_path = artifact_path
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
694
|
+
|
|
695
|
+
if send_start_notification:
|
|
696
|
+
project.notifiers.push_pipeline_start_message(
|
|
697
|
+
project.metadata.name, pipeline_id=workflow_id
|
|
698
|
+
)
|
|
754
699
|
err = None
|
|
755
700
|
try:
|
|
756
701
|
workflow_handler(**workflow_spec.args)
|
|
757
|
-
state =
|
|
702
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
758
703
|
except Exception as exc:
|
|
759
704
|
err = exc
|
|
760
705
|
logger.exception("Workflow run failed")
|
|
@@ -762,7 +707,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
762
707
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
763
708
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
764
709
|
)
|
|
765
|
-
state =
|
|
710
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
766
711
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
767
712
|
project.notifiers.push_pipeline_run_results(
|
|
768
713
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -796,6 +741,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
796
741
|
timeout=None,
|
|
797
742
|
expected_statuses=None,
|
|
798
743
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
744
|
+
**kwargs,
|
|
799
745
|
):
|
|
800
746
|
pass
|
|
801
747
|
|
|
@@ -817,13 +763,21 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
817
763
|
namespace: str = None,
|
|
818
764
|
source: str = None,
|
|
819
765
|
notifications: list[mlrun.model.Notification] = None,
|
|
766
|
+
send_start_notification: bool = True,
|
|
820
767
|
) -> typing.Optional[_PipelineRunStatus]:
|
|
821
768
|
workflow_name = normalize_workflow_name(name=name, project_name=project.name)
|
|
822
769
|
workflow_id = None
|
|
823
770
|
|
|
824
771
|
# for start message, fallback to old notification behavior
|
|
825
|
-
|
|
826
|
-
|
|
772
|
+
if send_start_notification:
|
|
773
|
+
for notification in notifications or []:
|
|
774
|
+
project.notifiers.add_notification(
|
|
775
|
+
notification.kind, notification.params
|
|
776
|
+
)
|
|
777
|
+
# if a notification with `when=running` is provided, it will be used explicitly and others
|
|
778
|
+
# will be ignored
|
|
779
|
+
if "running" in notification.when:
|
|
780
|
+
break
|
|
827
781
|
|
|
828
782
|
# The returned engine for this runner is the engine of the workflow.
|
|
829
783
|
# In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
|
|
@@ -921,9 +875,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
921
875
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
922
876
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
923
877
|
)
|
|
924
|
-
state =
|
|
878
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
925
879
|
else:
|
|
926
|
-
state =
|
|
880
|
+
state = mlrun_pipelines.common.models.RunStatuses.running
|
|
927
881
|
project.notifiers.push_pipeline_start_message(
|
|
928
882
|
project.metadata.name,
|
|
929
883
|
)
|
|
@@ -940,24 +894,47 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
940
894
|
@staticmethod
|
|
941
895
|
def get_run_status(
|
|
942
896
|
project,
|
|
943
|
-
run,
|
|
897
|
+
run: _PipelineRunStatus,
|
|
944
898
|
timeout=None,
|
|
945
899
|
expected_statuses=None,
|
|
946
900
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
901
|
+
inner_engine: type[_PipelineRunner] = None,
|
|
947
902
|
):
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
903
|
+
inner_engine = inner_engine or _KFPRunner
|
|
904
|
+
if inner_engine.engine == _KFPRunner.engine:
|
|
905
|
+
# ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
|
|
906
|
+
# so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
|
|
907
|
+
local_project_notifiers = list(
|
|
908
|
+
set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
|
|
909
|
+
set(project.notifiers.notifications.keys())
|
|
910
|
+
)
|
|
911
|
+
)
|
|
912
|
+
notifiers = mlrun.utils.notifications.CustomNotificationPusher(
|
|
913
|
+
local_project_notifiers
|
|
914
|
+
)
|
|
915
|
+
return _KFPRunner.get_run_status(
|
|
916
|
+
project,
|
|
917
|
+
run,
|
|
918
|
+
timeout,
|
|
919
|
+
expected_statuses,
|
|
920
|
+
notifiers=notifiers,
|
|
921
|
+
)
|
|
957
922
|
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
923
|
+
elif inner_engine.engine == _LocalRunner.engine:
|
|
924
|
+
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
925
|
+
pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
|
|
926
|
+
pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
|
|
927
|
+
pipeline_runner_run.logs(db=mldb)
|
|
928
|
+
pipeline_runner_run.refresh()
|
|
929
|
+
run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
|
|
930
|
+
pipeline_runner_run.status.state
|
|
931
|
+
)
|
|
932
|
+
run._exc = pipeline_runner_run.status.error
|
|
933
|
+
|
|
934
|
+
else:
|
|
935
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
936
|
+
f"Unsupported inner runner engine: {inner_engine.engine}"
|
|
937
|
+
)
|
|
961
938
|
|
|
962
939
|
|
|
963
940
|
def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
|
|
@@ -1116,7 +1093,7 @@ def load_and_run(
|
|
|
1116
1093
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1117
1094
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1118
1095
|
|
|
1119
|
-
if run.state ==
|
|
1096
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1120
1097
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1121
1098
|
|
|
1122
1099
|
if wait_for_completion:
|
|
@@ -1131,7 +1108,7 @@ def load_and_run(
|
|
|
1131
1108
|
|
|
1132
1109
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1133
1110
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1134
|
-
if pipeline_state !=
|
|
1111
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1135
1112
|
raise RuntimeError(
|
|
1136
1113
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1137
1114
|
)
|