mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +23 -111
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +46 -42
- mlrun/artifacts/model.py +9 -141
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/constants.py +65 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +10 -5
- mlrun/common/schemas/alert.py +92 -11
- mlrun/common/schemas/api_gateway.py +56 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +15 -3
- mlrun/common/schemas/model_monitoring/constants.py +58 -7
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +6 -11
- mlrun/common/types.py +1 -0
- mlrun/config.py +36 -8
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/datastore_profile.py +56 -4
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +147 -7
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +129 -9
- mlrun/datastore/utils.py +42 -0
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +55 -11
- mlrun/db/httpdb.py +346 -107
- mlrun/db/nopdb.py +52 -10
- mlrun/errors.py +11 -0
- mlrun/execution.py +24 -9
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/feature_vector.py +8 -0
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +16 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +5 -3
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/lists.py +6 -2
- mlrun/model.py +62 -20
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +280 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +22 -37
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +46 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +57 -216
- mlrun/model_monitoring/writer.py +134 -124
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +19 -12
- mlrun/projects/pipelines.py +103 -109
- mlrun/projects/project.py +377 -137
- mlrun/render.py +15 -14
- mlrun/run.py +16 -47
- mlrun/runtimes/__init__.py +6 -3
- mlrun/runtimes/base.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +440 -208
- mlrun/runtimes/nuclio/application/application.py +170 -8
- mlrun/runtimes/nuclio/function.py +39 -49
- mlrun/runtimes/pod.py +21 -41
- mlrun/runtimes/remotesparkjob.py +9 -3
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -45
- mlrun/serving/server.py +2 -1
- mlrun/serving/states.py +53 -2
- mlrun/serving/v2_serving.py +5 -1
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +107 -75
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +61 -13
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +154 -133
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0
mlrun/projects/operations.py
CHANGED
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
|
+
import mlrun.common.constants as mlrun_constants
|
|
21
22
|
from mlrun.utils import hub_prefix
|
|
22
23
|
|
|
23
24
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -76,7 +77,7 @@ def run_function(
|
|
|
76
77
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
78
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
79
|
builder_env: Optional[list] = None,
|
|
79
|
-
) -> Union[mlrun.model.RunObject,
|
|
80
|
+
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
80
81
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
82
|
|
|
82
83
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +87,7 @@ def run_function(
|
|
|
86
87
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
88
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
89
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
90
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
91
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
92
|
|
|
92
93
|
example (use with function object)::
|
|
@@ -166,7 +167,7 @@ def run_function(
|
|
|
166
167
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
167
168
|
"key": "the_key".
|
|
168
169
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
169
|
-
:return: MLRun RunObject or
|
|
170
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
170
171
|
"""
|
|
171
172
|
engine, function = _get_engine_and_function(function, project_object)
|
|
172
173
|
task = mlrun.new_task(
|
|
@@ -190,7 +191,9 @@ def run_function(
|
|
|
190
191
|
local = pipeline_context.is_run_local(local)
|
|
191
192
|
task.metadata.labels = task.metadata.labels or labels or {}
|
|
192
193
|
if pipeline_context.workflow_id:
|
|
193
|
-
task.metadata.labels[
|
|
194
|
+
task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
|
|
195
|
+
pipeline_context.workflow_id
|
|
196
|
+
)
|
|
194
197
|
if function.kind == "local":
|
|
195
198
|
command, function = mlrun.run.load_func_code(function)
|
|
196
199
|
function.spec.command = command
|
|
@@ -225,9 +228,9 @@ def run_function(
|
|
|
225
228
|
class BuildStatus:
|
|
226
229
|
"""returned status from build operation"""
|
|
227
230
|
|
|
228
|
-
def __init__(self, ready, outputs=
|
|
231
|
+
def __init__(self, ready, outputs=None, function=None):
|
|
229
232
|
self.ready = ready
|
|
230
|
-
self.outputs = outputs
|
|
233
|
+
self.outputs = outputs or {}
|
|
231
234
|
self.function = function
|
|
232
235
|
|
|
233
236
|
def after(self, step):
|
|
@@ -254,7 +257,7 @@ def build_function(
|
|
|
254
257
|
overwrite_build_params: bool = False,
|
|
255
258
|
extra_args: str = None,
|
|
256
259
|
force_build: bool = False,
|
|
257
|
-
) -> Union[BuildStatus,
|
|
260
|
+
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
258
261
|
"""deploy ML function, build container with its dependencies
|
|
259
262
|
|
|
260
263
|
:param function: Name of the function (in the project) or function object
|
|
@@ -294,7 +297,11 @@ def build_function(
|
|
|
294
297
|
if overwrite_build_params:
|
|
295
298
|
function.spec.build.commands = None
|
|
296
299
|
if requirements or requirements_file:
|
|
297
|
-
function.with_requirements(
|
|
300
|
+
function.with_requirements(
|
|
301
|
+
requirements=requirements,
|
|
302
|
+
requirements_file=requirements_file,
|
|
303
|
+
overwrite=True,
|
|
304
|
+
)
|
|
298
305
|
if commands:
|
|
299
306
|
function.with_commands(commands)
|
|
300
307
|
return function.deploy_step(
|
|
@@ -336,9 +343,9 @@ def build_function(
|
|
|
336
343
|
class DeployStatus:
|
|
337
344
|
"""returned status from deploy operation"""
|
|
338
345
|
|
|
339
|
-
def __init__(self, state, outputs=
|
|
346
|
+
def __init__(self, state, outputs=None, function=None):
|
|
340
347
|
self.state = state
|
|
341
|
-
self.outputs = outputs
|
|
348
|
+
self.outputs = outputs or {}
|
|
342
349
|
self.function = function
|
|
343
350
|
|
|
344
351
|
def after(self, step):
|
|
@@ -358,7 +365,7 @@ def deploy_function(
|
|
|
358
365
|
builder_env: dict = None,
|
|
359
366
|
project_object=None,
|
|
360
367
|
mock: bool = None,
|
|
361
|
-
) -> Union[DeployStatus,
|
|
368
|
+
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
362
369
|
"""deploy real-time (nuclio based) functions
|
|
363
370
|
|
|
364
371
|
:param function: name of the function (in the project) or function object
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -13,24 +13,26 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import builtins
|
|
16
|
+
import http
|
|
16
17
|
import importlib.util as imputil
|
|
17
18
|
import os
|
|
18
19
|
import tempfile
|
|
19
20
|
import typing
|
|
20
21
|
import uuid
|
|
21
22
|
|
|
22
|
-
import
|
|
23
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
24
25
|
from kfp.compiler import compiler
|
|
26
|
+
from mlrun_pipelines.helpers import new_pipe_metadata
|
|
25
27
|
|
|
26
28
|
import mlrun
|
|
29
|
+
import mlrun.common.runtimes.constants
|
|
27
30
|
import mlrun.common.schemas
|
|
28
31
|
import mlrun.utils.notifications
|
|
29
32
|
from mlrun.errors import err_to_str
|
|
30
33
|
from mlrun.utils import (
|
|
31
34
|
get_ui_url,
|
|
32
35
|
logger,
|
|
33
|
-
new_pipe_metadata,
|
|
34
36
|
normalize_workflow_name,
|
|
35
37
|
retry_until_successful,
|
|
36
38
|
)
|
|
@@ -300,72 +302,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
300
302
|
}
|
|
301
303
|
|
|
302
304
|
|
|
303
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
304
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
305
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
306
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
307
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
308
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
309
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
310
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
311
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
312
|
-
def _create_enriched_mlrun_workflow(
|
|
313
|
-
self,
|
|
314
|
-
pipeline_func: typing.Callable,
|
|
315
|
-
pipeline_name: typing.Optional[str] = None,
|
|
316
|
-
pipeline_description: typing.Optional[str] = None,
|
|
317
|
-
params_list: typing.Optional[list[dsl.PipelineParam]] = None,
|
|
318
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
319
|
-
):
|
|
320
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
321
|
-
workflow = self._original_create_workflow(
|
|
322
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
323
|
-
)
|
|
324
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
325
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
326
|
-
# we know can be raised.
|
|
327
|
-
try:
|
|
328
|
-
functions = []
|
|
329
|
-
if pipeline_context.functions:
|
|
330
|
-
try:
|
|
331
|
-
functions = pipeline_context.functions.values()
|
|
332
|
-
except Exception as err:
|
|
333
|
-
logger.debug(
|
|
334
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
335
|
-
error=err_to_str(err),
|
|
336
|
-
)
|
|
337
|
-
return workflow
|
|
338
|
-
|
|
339
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
340
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
341
|
-
if kfp_step_template.get("container"):
|
|
342
|
-
for function_obj in functions:
|
|
343
|
-
# we condition within each function since the comparison between the function and
|
|
344
|
-
# the kfp pod may change depending on the attribute type.
|
|
345
|
-
_set_function_attribute_on_kfp_pod(
|
|
346
|
-
kfp_step_template,
|
|
347
|
-
function_obj,
|
|
348
|
-
"PriorityClassName",
|
|
349
|
-
"priority_class_name",
|
|
350
|
-
)
|
|
351
|
-
_enrich_kfp_pod_security_context(
|
|
352
|
-
kfp_step_template,
|
|
353
|
-
function_obj,
|
|
354
|
-
)
|
|
355
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
356
|
-
raise
|
|
357
|
-
except Exception as err:
|
|
358
|
-
logger.debug(
|
|
359
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
360
|
-
)
|
|
361
|
-
return workflow
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
# patching function as class method
|
|
365
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
366
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
367
|
-
|
|
368
|
-
|
|
369
305
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
370
306
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
371
307
|
key, project.metadata.name
|
|
@@ -436,7 +372,7 @@ class _PipelineRunStatus:
|
|
|
436
372
|
engine: type["_PipelineRunner"],
|
|
437
373
|
project: "mlrun.projects.MlrunProject",
|
|
438
374
|
workflow: WorkflowSpec = None,
|
|
439
|
-
state:
|
|
375
|
+
state: mlrun_pipelines.common.models.RunStatuses = "",
|
|
440
376
|
exc: Exception = None,
|
|
441
377
|
):
|
|
442
378
|
"""
|
|
@@ -456,7 +392,10 @@ class _PipelineRunStatus:
|
|
|
456
392
|
|
|
457
393
|
@property
|
|
458
394
|
def state(self):
|
|
459
|
-
if
|
|
395
|
+
if (
|
|
396
|
+
self._state
|
|
397
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
398
|
+
):
|
|
460
399
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
461
400
|
return self._state
|
|
462
401
|
|
|
@@ -505,6 +444,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
505
444
|
namespace=None,
|
|
506
445
|
source=None,
|
|
507
446
|
notifications: list[mlrun.model.Notification] = None,
|
|
447
|
+
send_start_notification: bool = True,
|
|
508
448
|
) -> _PipelineRunStatus:
|
|
509
449
|
pass
|
|
510
450
|
|
|
@@ -521,7 +461,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
521
461
|
@staticmethod
|
|
522
462
|
def _get_handler(workflow_handler, workflow_spec, project, secrets):
|
|
523
463
|
if not (workflow_handler and callable(workflow_handler)):
|
|
524
|
-
workflow_file = workflow_spec.get_source_file(project.spec.
|
|
464
|
+
workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
|
|
525
465
|
workflow_handler = create_pipeline(
|
|
526
466
|
project,
|
|
527
467
|
workflow_file,
|
|
@@ -541,6 +481,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
541
481
|
timeout=None,
|
|
542
482
|
expected_statuses=None,
|
|
543
483
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
484
|
+
**kwargs,
|
|
544
485
|
):
|
|
545
486
|
pass
|
|
546
487
|
|
|
@@ -553,7 +494,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
553
494
|
@classmethod
|
|
554
495
|
def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
|
|
555
496
|
pipeline_context.set(project, workflow_spec)
|
|
556
|
-
workflow_file = workflow_spec.get_source_file(project.spec.
|
|
497
|
+
workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
|
|
557
498
|
functions = FunctionsDict(project)
|
|
558
499
|
pipeline = create_pipeline(
|
|
559
500
|
project,
|
|
@@ -583,6 +524,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
583
524
|
namespace=None,
|
|
584
525
|
source=None,
|
|
585
526
|
notifications: list[mlrun.model.Notification] = None,
|
|
527
|
+
send_start_notification: bool = True,
|
|
586
528
|
) -> _PipelineRunStatus:
|
|
587
529
|
pipeline_context.set(project, workflow_spec)
|
|
588
530
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -631,13 +573,13 @@ class _KFPRunner(_PipelineRunner):
|
|
|
631
573
|
func_name=func.metadata.name,
|
|
632
574
|
exc_info=err_to_str(exc),
|
|
633
575
|
)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
576
|
+
if send_start_notification:
|
|
577
|
+
project.notifiers.push_pipeline_start_message(
|
|
578
|
+
project.metadata.name,
|
|
579
|
+
project.get_param("commit_id", None),
|
|
580
|
+
run_id,
|
|
581
|
+
True,
|
|
582
|
+
)
|
|
641
583
|
pipeline_context.clear()
|
|
642
584
|
return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
|
|
643
585
|
|
|
@@ -672,6 +614,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
672
614
|
timeout=None,
|
|
673
615
|
expected_statuses=None,
|
|
674
616
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
617
|
+
**kwargs,
|
|
675
618
|
):
|
|
676
619
|
if timeout is None:
|
|
677
620
|
timeout = 60 * 60
|
|
@@ -727,6 +670,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
727
670
|
namespace=None,
|
|
728
671
|
source=None,
|
|
729
672
|
notifications: list[mlrun.model.Notification] = None,
|
|
673
|
+
send_start_notification: bool = True,
|
|
730
674
|
) -> _PipelineRunStatus:
|
|
731
675
|
pipeline_context.set(project, workflow_spec)
|
|
732
676
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -747,13 +691,15 @@ class _LocalRunner(_PipelineRunner):
|
|
|
747
691
|
original_source = project.spec.source
|
|
748
692
|
project.set_source(source=source)
|
|
749
693
|
pipeline_context.workflow_artifact_path = artifact_path
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
694
|
+
|
|
695
|
+
if send_start_notification:
|
|
696
|
+
project.notifiers.push_pipeline_start_message(
|
|
697
|
+
project.metadata.name, pipeline_id=workflow_id
|
|
698
|
+
)
|
|
753
699
|
err = None
|
|
754
700
|
try:
|
|
755
701
|
workflow_handler(**workflow_spec.args)
|
|
756
|
-
state =
|
|
702
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
757
703
|
except Exception as exc:
|
|
758
704
|
err = exc
|
|
759
705
|
logger.exception("Workflow run failed")
|
|
@@ -761,7 +707,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
761
707
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
762
708
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
763
709
|
)
|
|
764
|
-
state =
|
|
710
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
765
711
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
766
712
|
project.notifiers.push_pipeline_run_results(
|
|
767
713
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -795,6 +741,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
795
741
|
timeout=None,
|
|
796
742
|
expected_statuses=None,
|
|
797
743
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
744
|
+
**kwargs,
|
|
798
745
|
):
|
|
799
746
|
pass
|
|
800
747
|
|
|
@@ -816,13 +763,21 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
816
763
|
namespace: str = None,
|
|
817
764
|
source: str = None,
|
|
818
765
|
notifications: list[mlrun.model.Notification] = None,
|
|
766
|
+
send_start_notification: bool = True,
|
|
819
767
|
) -> typing.Optional[_PipelineRunStatus]:
|
|
820
768
|
workflow_name = normalize_workflow_name(name=name, project_name=project.name)
|
|
821
769
|
workflow_id = None
|
|
822
770
|
|
|
823
771
|
# for start message, fallback to old notification behavior
|
|
824
|
-
|
|
825
|
-
|
|
772
|
+
if send_start_notification:
|
|
773
|
+
for notification in notifications or []:
|
|
774
|
+
project.notifiers.add_notification(
|
|
775
|
+
notification.kind, notification.params
|
|
776
|
+
)
|
|
777
|
+
# if a notification with `when=running` is provided, it will be used explicitly and others
|
|
778
|
+
# will be ignored
|
|
779
|
+
if "running" in notification.when:
|
|
780
|
+
break
|
|
826
781
|
|
|
827
782
|
# The returned engine for this runner is the engine of the workflow.
|
|
828
783
|
# In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
|
|
@@ -882,17 +837,33 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
882
837
|
get_workflow_id_timeout=get_workflow_id_timeout,
|
|
883
838
|
)
|
|
884
839
|
|
|
840
|
+
def _get_workflow_id_or_bail():
|
|
841
|
+
try:
|
|
842
|
+
return run_db.get_workflow_id(
|
|
843
|
+
project=project.name,
|
|
844
|
+
name=workflow_response.name,
|
|
845
|
+
run_id=workflow_response.run_id,
|
|
846
|
+
engine=workflow_spec.engine,
|
|
847
|
+
)
|
|
848
|
+
except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
|
|
849
|
+
# fail fast on specific errors
|
|
850
|
+
if get_wf_exc.error_status_code in [
|
|
851
|
+
http.HTTPStatus.PRECONDITION_FAILED
|
|
852
|
+
]:
|
|
853
|
+
raise mlrun.errors.MLRunFatalFailureError(
|
|
854
|
+
original_exception=get_wf_exc
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
# raise for a retry (on other errors)
|
|
858
|
+
raise
|
|
859
|
+
|
|
885
860
|
# Getting workflow id from run:
|
|
886
861
|
response = retry_until_successful(
|
|
887
862
|
1,
|
|
888
863
|
get_workflow_id_timeout,
|
|
889
864
|
logger,
|
|
890
865
|
False,
|
|
891
|
-
|
|
892
|
-
project=project.name,
|
|
893
|
-
name=workflow_response.name,
|
|
894
|
-
run_id=workflow_response.run_id,
|
|
895
|
-
engine=workflow_spec.engine,
|
|
866
|
+
_get_workflow_id_or_bail,
|
|
896
867
|
)
|
|
897
868
|
workflow_id = response.workflow_id
|
|
898
869
|
# After fetching the workflow_id the workflow executed successfully
|
|
@@ -904,9 +875,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
904
875
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
905
876
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
906
877
|
)
|
|
907
|
-
state =
|
|
878
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
908
879
|
else:
|
|
909
|
-
state =
|
|
880
|
+
state = mlrun_pipelines.common.models.RunStatuses.running
|
|
910
881
|
project.notifiers.push_pipeline_start_message(
|
|
911
882
|
project.metadata.name,
|
|
912
883
|
)
|
|
@@ -923,24 +894,47 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
923
894
|
@staticmethod
|
|
924
895
|
def get_run_status(
|
|
925
896
|
project,
|
|
926
|
-
run,
|
|
897
|
+
run: _PipelineRunStatus,
|
|
927
898
|
timeout=None,
|
|
928
899
|
expected_statuses=None,
|
|
929
900
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
901
|
+
inner_engine: type[_PipelineRunner] = None,
|
|
930
902
|
):
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
903
|
+
inner_engine = inner_engine or _KFPRunner
|
|
904
|
+
if inner_engine.engine == _KFPRunner.engine:
|
|
905
|
+
# ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
|
|
906
|
+
# so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
|
|
907
|
+
local_project_notifiers = list(
|
|
908
|
+
set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
|
|
909
|
+
set(project.notifiers.notifications.keys())
|
|
910
|
+
)
|
|
911
|
+
)
|
|
912
|
+
notifiers = mlrun.utils.notifications.CustomNotificationPusher(
|
|
913
|
+
local_project_notifiers
|
|
914
|
+
)
|
|
915
|
+
return _KFPRunner.get_run_status(
|
|
916
|
+
project,
|
|
917
|
+
run,
|
|
918
|
+
timeout,
|
|
919
|
+
expected_statuses,
|
|
920
|
+
notifiers=notifiers,
|
|
921
|
+
)
|
|
940
922
|
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
923
|
+
elif inner_engine.engine == _LocalRunner.engine:
|
|
924
|
+
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
925
|
+
pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
|
|
926
|
+
pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
|
|
927
|
+
pipeline_runner_run.logs(db=mldb)
|
|
928
|
+
pipeline_runner_run.refresh()
|
|
929
|
+
run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
|
|
930
|
+
pipeline_runner_run.status.state
|
|
931
|
+
)
|
|
932
|
+
run._exc = pipeline_runner_run.status.error
|
|
933
|
+
|
|
934
|
+
else:
|
|
935
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
936
|
+
f"Unsupported inner runner engine: {inner_engine.engine}"
|
|
937
|
+
)
|
|
944
938
|
|
|
945
939
|
|
|
946
940
|
def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
|
|
@@ -1099,7 +1093,7 @@ def load_and_run(
|
|
|
1099
1093
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1100
1094
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1101
1095
|
|
|
1102
|
-
if run.state ==
|
|
1096
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1103
1097
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1104
1098
|
|
|
1105
1099
|
if wait_for_completion:
|
|
@@ -1114,7 +1108,7 @@ def load_and_run(
|
|
|
1114
1108
|
|
|
1115
1109
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1116
1110
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1117
|
-
if pipeline_state !=
|
|
1111
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1118
1112
|
raise RuntimeError(
|
|
1119
1113
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1120
1114
|
)
|