mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +23 -111
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +169 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +46 -42
- mlrun/artifacts/model.py +9 -141
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/constants.py +65 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +10 -5
- mlrun/common/schemas/alert.py +92 -11
- mlrun/common/schemas/api_gateway.py +56 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +15 -3
- mlrun/common/schemas/model_monitoring/constants.py +58 -7
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +5 -11
- mlrun/common/types.py +1 -0
- mlrun/config.py +30 -9
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/datastore_profile.py +56 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +147 -7
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +110 -42
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +54 -10
- mlrun/db/httpdb.py +282 -79
- mlrun/db/nopdb.py +52 -10
- mlrun/errors.py +11 -0
- mlrun/execution.py +26 -9
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/feature_vector.py +8 -0
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +16 -0
- mlrun/frameworks/__init__.py +6 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +5 -3
- mlrun/launcher/local.py +12 -2
- mlrun/launcher/remote.py +9 -2
- mlrun/lists.py +6 -2
- mlrun/model.py +47 -21
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +42 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +280 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +22 -37
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +316 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +401 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +658 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +63 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +57 -216
- mlrun/model_monitoring/writer.py +134 -124
- mlrun/package/__init__.py +13 -1
- mlrun/package/packagers/__init__.py +6 -1
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +24 -12
- mlrun/projects/pipelines.py +79 -102
- mlrun/projects/project.py +271 -103
- mlrun/render.py +15 -14
- mlrun/run.py +16 -46
- mlrun/runtimes/__init__.py +6 -3
- mlrun/runtimes/base.py +14 -7
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +12 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +194 -84
- mlrun/runtimes/nuclio/application/application.py +170 -8
- mlrun/runtimes/nuclio/function.py +39 -49
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +9 -3
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -45
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/server.py +2 -1
- mlrun/serving/states.py +51 -8
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +5 -1
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +157 -83
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +34 -7
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/METADATA +14 -6
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/RECORD +158 -138
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/top_level.txt +0 -0
mlrun/projects/operations.py
CHANGED
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
|
+
import mlrun.common.constants as mlrun_constants
|
|
21
22
|
from mlrun.utils import hub_prefix
|
|
22
23
|
|
|
23
24
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -76,7 +77,8 @@ def run_function(
|
|
|
76
77
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
78
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
79
|
builder_env: Optional[list] = None,
|
|
79
|
-
|
|
80
|
+
reset_on_run: Optional[bool] = None,
|
|
81
|
+
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
80
82
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
83
|
|
|
82
84
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +88,7 @@ def run_function(
|
|
|
86
88
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
89
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
90
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
91
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
92
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
93
|
|
|
92
94
|
example (use with function object)::
|
|
@@ -166,7 +168,10 @@ def run_function(
|
|
|
166
168
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
167
169
|
"key": "the_key".
|
|
168
170
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
169
|
-
:
|
|
171
|
+
:param reset_on_run: When True, function python modules would reload prior to code execution.
|
|
172
|
+
This ensures latest code changes are executed. This argument must be used in
|
|
173
|
+
conjunction with the local=True argument.
|
|
174
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
170
175
|
"""
|
|
171
176
|
engine, function = _get_engine_and_function(function, project_object)
|
|
172
177
|
task = mlrun.new_task(
|
|
@@ -190,7 +195,9 @@ def run_function(
|
|
|
190
195
|
local = pipeline_context.is_run_local(local)
|
|
191
196
|
task.metadata.labels = task.metadata.labels or labels or {}
|
|
192
197
|
if pipeline_context.workflow_id:
|
|
193
|
-
task.metadata.labels[
|
|
198
|
+
task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
|
|
199
|
+
pipeline_context.workflow_id
|
|
200
|
+
)
|
|
194
201
|
if function.kind == "local":
|
|
195
202
|
command, function = mlrun.run.load_func_code(function)
|
|
196
203
|
function.spec.command = command
|
|
@@ -212,6 +219,7 @@ def run_function(
|
|
|
212
219
|
schedule=schedule,
|
|
213
220
|
notifications=notifications,
|
|
214
221
|
builder_env=builder_env,
|
|
222
|
+
reset_on_run=reset_on_run,
|
|
215
223
|
)
|
|
216
224
|
if run_result:
|
|
217
225
|
run_result._notified = False
|
|
@@ -225,9 +233,9 @@ def run_function(
|
|
|
225
233
|
class BuildStatus:
|
|
226
234
|
"""returned status from build operation"""
|
|
227
235
|
|
|
228
|
-
def __init__(self, ready, outputs=
|
|
236
|
+
def __init__(self, ready, outputs=None, function=None):
|
|
229
237
|
self.ready = ready
|
|
230
|
-
self.outputs = outputs
|
|
238
|
+
self.outputs = outputs or {}
|
|
231
239
|
self.function = function
|
|
232
240
|
|
|
233
241
|
def after(self, step):
|
|
@@ -254,7 +262,7 @@ def build_function(
|
|
|
254
262
|
overwrite_build_params: bool = False,
|
|
255
263
|
extra_args: str = None,
|
|
256
264
|
force_build: bool = False,
|
|
257
|
-
) -> Union[BuildStatus,
|
|
265
|
+
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
258
266
|
"""deploy ML function, build container with its dependencies
|
|
259
267
|
|
|
260
268
|
:param function: Name of the function (in the project) or function object
|
|
@@ -294,7 +302,11 @@ def build_function(
|
|
|
294
302
|
if overwrite_build_params:
|
|
295
303
|
function.spec.build.commands = None
|
|
296
304
|
if requirements or requirements_file:
|
|
297
|
-
function.with_requirements(
|
|
305
|
+
function.with_requirements(
|
|
306
|
+
requirements=requirements,
|
|
307
|
+
requirements_file=requirements_file,
|
|
308
|
+
overwrite=True,
|
|
309
|
+
)
|
|
298
310
|
if commands:
|
|
299
311
|
function.with_commands(commands)
|
|
300
312
|
return function.deploy_step(
|
|
@@ -336,9 +348,9 @@ def build_function(
|
|
|
336
348
|
class DeployStatus:
|
|
337
349
|
"""returned status from deploy operation"""
|
|
338
350
|
|
|
339
|
-
def __init__(self, state, outputs=
|
|
351
|
+
def __init__(self, state, outputs=None, function=None):
|
|
340
352
|
self.state = state
|
|
341
|
-
self.outputs = outputs
|
|
353
|
+
self.outputs = outputs or {}
|
|
342
354
|
self.function = function
|
|
343
355
|
|
|
344
356
|
def after(self, step):
|
|
@@ -358,7 +370,7 @@ def deploy_function(
|
|
|
358
370
|
builder_env: dict = None,
|
|
359
371
|
project_object=None,
|
|
360
372
|
mock: bool = None,
|
|
361
|
-
) -> Union[DeployStatus,
|
|
373
|
+
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
362
374
|
"""deploy real-time (nuclio based) functions
|
|
363
375
|
|
|
364
376
|
:param function: name of the function (in the project) or function object
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -20,18 +20,19 @@ import tempfile
|
|
|
20
20
|
import typing
|
|
21
21
|
import uuid
|
|
22
22
|
|
|
23
|
-
import
|
|
24
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
25
25
|
from kfp.compiler import compiler
|
|
26
|
+
from mlrun_pipelines.helpers import new_pipe_metadata
|
|
26
27
|
|
|
27
28
|
import mlrun
|
|
29
|
+
import mlrun.common.runtimes.constants
|
|
28
30
|
import mlrun.common.schemas
|
|
29
31
|
import mlrun.utils.notifications
|
|
30
32
|
from mlrun.errors import err_to_str
|
|
31
33
|
from mlrun.utils import (
|
|
32
34
|
get_ui_url,
|
|
33
35
|
logger,
|
|
34
|
-
new_pipe_metadata,
|
|
35
36
|
normalize_workflow_name,
|
|
36
37
|
retry_until_successful,
|
|
37
38
|
)
|
|
@@ -301,72 +302,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
301
302
|
}
|
|
302
303
|
|
|
303
304
|
|
|
304
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
305
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
306
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
307
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
308
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
309
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
310
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
311
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
312
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
313
|
-
def _create_enriched_mlrun_workflow(
|
|
314
|
-
self,
|
|
315
|
-
pipeline_func: typing.Callable,
|
|
316
|
-
pipeline_name: typing.Optional[str] = None,
|
|
317
|
-
pipeline_description: typing.Optional[str] = None,
|
|
318
|
-
params_list: typing.Optional[list[dsl.PipelineParam]] = None,
|
|
319
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
320
|
-
):
|
|
321
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
322
|
-
workflow = self._original_create_workflow(
|
|
323
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
324
|
-
)
|
|
325
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
326
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
327
|
-
# we know can be raised.
|
|
328
|
-
try:
|
|
329
|
-
functions = []
|
|
330
|
-
if pipeline_context.functions:
|
|
331
|
-
try:
|
|
332
|
-
functions = pipeline_context.functions.values()
|
|
333
|
-
except Exception as err:
|
|
334
|
-
logger.debug(
|
|
335
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
336
|
-
error=err_to_str(err),
|
|
337
|
-
)
|
|
338
|
-
return workflow
|
|
339
|
-
|
|
340
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
341
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
342
|
-
if kfp_step_template.get("container"):
|
|
343
|
-
for function_obj in functions:
|
|
344
|
-
# we condition within each function since the comparison between the function and
|
|
345
|
-
# the kfp pod may change depending on the attribute type.
|
|
346
|
-
_set_function_attribute_on_kfp_pod(
|
|
347
|
-
kfp_step_template,
|
|
348
|
-
function_obj,
|
|
349
|
-
"PriorityClassName",
|
|
350
|
-
"priority_class_name",
|
|
351
|
-
)
|
|
352
|
-
_enrich_kfp_pod_security_context(
|
|
353
|
-
kfp_step_template,
|
|
354
|
-
function_obj,
|
|
355
|
-
)
|
|
356
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
357
|
-
raise
|
|
358
|
-
except Exception as err:
|
|
359
|
-
logger.debug(
|
|
360
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
361
|
-
)
|
|
362
|
-
return workflow
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
# patching function as class method
|
|
366
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
367
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
368
|
-
|
|
369
|
-
|
|
370
305
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
371
306
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
372
307
|
key, project.metadata.name
|
|
@@ -437,7 +372,7 @@ class _PipelineRunStatus:
|
|
|
437
372
|
engine: type["_PipelineRunner"],
|
|
438
373
|
project: "mlrun.projects.MlrunProject",
|
|
439
374
|
workflow: WorkflowSpec = None,
|
|
440
|
-
state:
|
|
375
|
+
state: mlrun_pipelines.common.models.RunStatuses = "",
|
|
441
376
|
exc: Exception = None,
|
|
442
377
|
):
|
|
443
378
|
"""
|
|
@@ -457,7 +392,10 @@ class _PipelineRunStatus:
|
|
|
457
392
|
|
|
458
393
|
@property
|
|
459
394
|
def state(self):
|
|
460
|
-
if
|
|
395
|
+
if (
|
|
396
|
+
self._state
|
|
397
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
398
|
+
):
|
|
461
399
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
462
400
|
return self._state
|
|
463
401
|
|
|
@@ -506,6 +444,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
506
444
|
namespace=None,
|
|
507
445
|
source=None,
|
|
508
446
|
notifications: list[mlrun.model.Notification] = None,
|
|
447
|
+
send_start_notification: bool = True,
|
|
509
448
|
) -> _PipelineRunStatus:
|
|
510
449
|
pass
|
|
511
450
|
|
|
@@ -542,6 +481,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
542
481
|
timeout=None,
|
|
543
482
|
expected_statuses=None,
|
|
544
483
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
484
|
+
**kwargs,
|
|
545
485
|
):
|
|
546
486
|
pass
|
|
547
487
|
|
|
@@ -584,6 +524,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
584
524
|
namespace=None,
|
|
585
525
|
source=None,
|
|
586
526
|
notifications: list[mlrun.model.Notification] = None,
|
|
527
|
+
send_start_notification: bool = True,
|
|
587
528
|
) -> _PipelineRunStatus:
|
|
588
529
|
pipeline_context.set(project, workflow_spec)
|
|
589
530
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -632,13 +573,13 @@ class _KFPRunner(_PipelineRunner):
|
|
|
632
573
|
func_name=func.metadata.name,
|
|
633
574
|
exc_info=err_to_str(exc),
|
|
634
575
|
)
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
576
|
+
if send_start_notification:
|
|
577
|
+
project.notifiers.push_pipeline_start_message(
|
|
578
|
+
project.metadata.name,
|
|
579
|
+
project.get_param("commit_id", None),
|
|
580
|
+
run_id,
|
|
581
|
+
True,
|
|
582
|
+
)
|
|
642
583
|
pipeline_context.clear()
|
|
643
584
|
return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
|
|
644
585
|
|
|
@@ -673,6 +614,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
673
614
|
timeout=None,
|
|
674
615
|
expected_statuses=None,
|
|
675
616
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
617
|
+
**kwargs,
|
|
676
618
|
):
|
|
677
619
|
if timeout is None:
|
|
678
620
|
timeout = 60 * 60
|
|
@@ -728,6 +670,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
728
670
|
namespace=None,
|
|
729
671
|
source=None,
|
|
730
672
|
notifications: list[mlrun.model.Notification] = None,
|
|
673
|
+
send_start_notification: bool = True,
|
|
731
674
|
) -> _PipelineRunStatus:
|
|
732
675
|
pipeline_context.set(project, workflow_spec)
|
|
733
676
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -748,13 +691,15 @@ class _LocalRunner(_PipelineRunner):
|
|
|
748
691
|
original_source = project.spec.source
|
|
749
692
|
project.set_source(source=source)
|
|
750
693
|
pipeline_context.workflow_artifact_path = artifact_path
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
694
|
+
|
|
695
|
+
if send_start_notification:
|
|
696
|
+
project.notifiers.push_pipeline_start_message(
|
|
697
|
+
project.metadata.name, pipeline_id=workflow_id
|
|
698
|
+
)
|
|
754
699
|
err = None
|
|
755
700
|
try:
|
|
756
701
|
workflow_handler(**workflow_spec.args)
|
|
757
|
-
state =
|
|
702
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
758
703
|
except Exception as exc:
|
|
759
704
|
err = exc
|
|
760
705
|
logger.exception("Workflow run failed")
|
|
@@ -762,7 +707,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
762
707
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
763
708
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
764
709
|
)
|
|
765
|
-
state =
|
|
710
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
766
711
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
767
712
|
project.notifiers.push_pipeline_run_results(
|
|
768
713
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -796,6 +741,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
796
741
|
timeout=None,
|
|
797
742
|
expected_statuses=None,
|
|
798
743
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
744
|
+
**kwargs,
|
|
799
745
|
):
|
|
800
746
|
pass
|
|
801
747
|
|
|
@@ -817,13 +763,21 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
817
763
|
namespace: str = None,
|
|
818
764
|
source: str = None,
|
|
819
765
|
notifications: list[mlrun.model.Notification] = None,
|
|
766
|
+
send_start_notification: bool = True,
|
|
820
767
|
) -> typing.Optional[_PipelineRunStatus]:
|
|
821
768
|
workflow_name = normalize_workflow_name(name=name, project_name=project.name)
|
|
822
769
|
workflow_id = None
|
|
823
770
|
|
|
824
771
|
# for start message, fallback to old notification behavior
|
|
825
|
-
|
|
826
|
-
|
|
772
|
+
if send_start_notification:
|
|
773
|
+
for notification in notifications or []:
|
|
774
|
+
project.notifiers.add_notification(
|
|
775
|
+
notification.kind, notification.params
|
|
776
|
+
)
|
|
777
|
+
# if a notification with `when=running` is provided, it will be used explicitly and others
|
|
778
|
+
# will be ignored
|
|
779
|
+
if "running" in notification.when:
|
|
780
|
+
break
|
|
827
781
|
|
|
828
782
|
# The returned engine for this runner is the engine of the workflow.
|
|
829
783
|
# In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
|
|
@@ -921,9 +875,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
921
875
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
922
876
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
923
877
|
)
|
|
924
|
-
state =
|
|
878
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
925
879
|
else:
|
|
926
|
-
state =
|
|
880
|
+
state = mlrun_pipelines.common.models.RunStatuses.running
|
|
927
881
|
project.notifiers.push_pipeline_start_message(
|
|
928
882
|
project.metadata.name,
|
|
929
883
|
)
|
|
@@ -940,24 +894,47 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
940
894
|
@staticmethod
|
|
941
895
|
def get_run_status(
|
|
942
896
|
project,
|
|
943
|
-
run,
|
|
897
|
+
run: _PipelineRunStatus,
|
|
944
898
|
timeout=None,
|
|
945
899
|
expected_statuses=None,
|
|
946
900
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
901
|
+
inner_engine: type[_PipelineRunner] = None,
|
|
947
902
|
):
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
903
|
+
inner_engine = inner_engine or _KFPRunner
|
|
904
|
+
if inner_engine.engine == _KFPRunner.engine:
|
|
905
|
+
# ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
|
|
906
|
+
# so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
|
|
907
|
+
local_project_notifiers = list(
|
|
908
|
+
set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
|
|
909
|
+
set(project.notifiers.notifications.keys())
|
|
910
|
+
)
|
|
911
|
+
)
|
|
912
|
+
notifiers = mlrun.utils.notifications.CustomNotificationPusher(
|
|
913
|
+
local_project_notifiers
|
|
914
|
+
)
|
|
915
|
+
return _KFPRunner.get_run_status(
|
|
916
|
+
project,
|
|
917
|
+
run,
|
|
918
|
+
timeout,
|
|
919
|
+
expected_statuses,
|
|
920
|
+
notifiers=notifiers,
|
|
921
|
+
)
|
|
957
922
|
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
923
|
+
elif inner_engine.engine == _LocalRunner.engine:
|
|
924
|
+
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
925
|
+
pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
|
|
926
|
+
pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
|
|
927
|
+
pipeline_runner_run.logs(db=mldb)
|
|
928
|
+
pipeline_runner_run.refresh()
|
|
929
|
+
run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
|
|
930
|
+
pipeline_runner_run.status.state
|
|
931
|
+
)
|
|
932
|
+
run._exc = pipeline_runner_run.status.error
|
|
933
|
+
|
|
934
|
+
else:
|
|
935
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
936
|
+
f"Unsupported inner runner engine: {inner_engine.engine}"
|
|
937
|
+
)
|
|
961
938
|
|
|
962
939
|
|
|
963
940
|
def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
|
|
@@ -1116,7 +1093,7 @@ def load_and_run(
|
|
|
1116
1093
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1117
1094
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1118
1095
|
|
|
1119
|
-
if run.state ==
|
|
1096
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1120
1097
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1121
1098
|
|
|
1122
1099
|
if wait_for_completion:
|
|
@@ -1131,7 +1108,7 @@ def load_and_run(
|
|
|
1131
1108
|
|
|
1132
1109
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1133
1110
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1134
|
-
if pipeline_state !=
|
|
1111
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1135
1112
|
raise RuntimeError(
|
|
1136
1113
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1137
1114
|
)
|