mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +25 -111
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +38 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +41 -47
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +68 -0
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +25 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +8 -2
- mlrun/common/schemas/client_spec.py +2 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +19 -3
- mlrun/common/schemas/model_monitoring/constants.py +96 -26
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +22 -21
- mlrun/common/types.py +7 -1
- mlrun/config.py +87 -19
- mlrun/data_types/data_types.py +4 -0
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +69 -30
- mlrun/datastore/datastore.py +10 -2
- mlrun/datastore/datastore_profile.py +90 -6
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +172 -44
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +285 -41
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +149 -14
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +608 -178
- mlrun/db/nopdb.py +191 -7
- mlrun/errors.py +11 -0
- mlrun/execution.py +37 -20
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +21 -52
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +2 -1
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +30 -19
- mlrun/features.py +4 -13
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +9 -3
- mlrun/launcher/remote.py +9 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +58 -19
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +127 -301
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +30 -36
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +100 -7
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +93 -228
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +30 -16
- mlrun/projects/pipelines.py +92 -99
- mlrun/projects/project.py +757 -268
- mlrun/render.py +15 -14
- mlrun/run.py +160 -162
- mlrun/runtimes/__init__.py +55 -3
- mlrun/runtimes/base.py +33 -19
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +98 -58
- mlrun/runtimes/nuclio/serving.py +36 -42
- mlrun/runtimes/pod.py +196 -45
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +7 -4
- mlrun/serving/server.py +7 -8
- mlrun/serving/states.py +73 -43
- mlrun/serving/v2_serving.py +8 -7
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +141 -75
- mlrun/utils/http.py +1 -1
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +12 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/projects/operations.py
CHANGED
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
|
+
import mlrun.common.constants as mlrun_constants
|
|
21
22
|
from mlrun.utils import hub_prefix
|
|
22
23
|
|
|
23
24
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -76,7 +77,7 @@ def run_function(
|
|
|
76
77
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
78
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
79
|
builder_env: Optional[list] = None,
|
|
79
|
-
) -> Union[mlrun.model.RunObject,
|
|
80
|
+
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
80
81
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
82
|
|
|
82
83
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +87,7 @@ def run_function(
|
|
|
86
87
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
88
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
89
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
90
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
91
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
92
|
|
|
92
93
|
example (use with function object)::
|
|
@@ -95,8 +96,11 @@ def run_function(
|
|
|
95
96
|
MODEL_CLASS = "sklearn.ensemble.RandomForestClassifier"
|
|
96
97
|
DATA_PATH = "s3://bigdata/data.parquet"
|
|
97
98
|
function = mlrun.import_function("hub://auto-trainer")
|
|
98
|
-
run1 = run_function(
|
|
99
|
-
|
|
99
|
+
run1 = run_function(
|
|
100
|
+
function,
|
|
101
|
+
params={"label_columns": LABELS, "model_class": MODEL_CLASS},
|
|
102
|
+
inputs={"dataset": DATA_PATH},
|
|
103
|
+
)
|
|
100
104
|
|
|
101
105
|
example (use with project)::
|
|
102
106
|
|
|
@@ -115,8 +119,12 @@ def run_function(
|
|
|
115
119
|
@dsl.pipeline(name="test pipeline", description="test")
|
|
116
120
|
def my_pipe(url=""):
|
|
117
121
|
run1 = run_function("loaddata", params={"url": url}, outputs=["data"])
|
|
118
|
-
run2 = run_function(
|
|
119
|
-
|
|
122
|
+
run2 = run_function(
|
|
123
|
+
"train",
|
|
124
|
+
params={"label_columns": LABELS, "model_class": MODEL_CLASS},
|
|
125
|
+
inputs={"dataset": run1.outputs["data"]},
|
|
126
|
+
)
|
|
127
|
+
|
|
120
128
|
|
|
121
129
|
project.run(workflow_handler=my_pipe, arguments={"param1": 7})
|
|
122
130
|
|
|
@@ -159,7 +167,7 @@ def run_function(
|
|
|
159
167
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
160
168
|
"key": "the_key".
|
|
161
169
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
162
|
-
:return: MLRun RunObject or
|
|
170
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
163
171
|
"""
|
|
164
172
|
engine, function = _get_engine_and_function(function, project_object)
|
|
165
173
|
task = mlrun.new_task(
|
|
@@ -183,7 +191,9 @@ def run_function(
|
|
|
183
191
|
local = pipeline_context.is_run_local(local)
|
|
184
192
|
task.metadata.labels = task.metadata.labels or labels or {}
|
|
185
193
|
if pipeline_context.workflow_id:
|
|
186
|
-
task.metadata.labels[
|
|
194
|
+
task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
|
|
195
|
+
pipeline_context.workflow_id
|
|
196
|
+
)
|
|
187
197
|
if function.kind == "local":
|
|
188
198
|
command, function = mlrun.run.load_func_code(function)
|
|
189
199
|
function.spec.command = command
|
|
@@ -218,9 +228,9 @@ def run_function(
|
|
|
218
228
|
class BuildStatus:
|
|
219
229
|
"""returned status from build operation"""
|
|
220
230
|
|
|
221
|
-
def __init__(self, ready, outputs=
|
|
231
|
+
def __init__(self, ready, outputs=None, function=None):
|
|
222
232
|
self.ready = ready
|
|
223
|
-
self.outputs = outputs
|
|
233
|
+
self.outputs = outputs or {}
|
|
224
234
|
self.function = function
|
|
225
235
|
|
|
226
236
|
def after(self, step):
|
|
@@ -247,7 +257,7 @@ def build_function(
|
|
|
247
257
|
overwrite_build_params: bool = False,
|
|
248
258
|
extra_args: str = None,
|
|
249
259
|
force_build: bool = False,
|
|
250
|
-
) -> Union[BuildStatus,
|
|
260
|
+
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
251
261
|
"""deploy ML function, build container with its dependencies
|
|
252
262
|
|
|
253
263
|
:param function: Name of the function (in the project) or function object
|
|
@@ -287,7 +297,11 @@ def build_function(
|
|
|
287
297
|
if overwrite_build_params:
|
|
288
298
|
function.spec.build.commands = None
|
|
289
299
|
if requirements or requirements_file:
|
|
290
|
-
function.with_requirements(
|
|
300
|
+
function.with_requirements(
|
|
301
|
+
requirements=requirements,
|
|
302
|
+
requirements_file=requirements_file,
|
|
303
|
+
overwrite=True,
|
|
304
|
+
)
|
|
291
305
|
if commands:
|
|
292
306
|
function.with_commands(commands)
|
|
293
307
|
return function.deploy_step(
|
|
@@ -329,9 +343,9 @@ def build_function(
|
|
|
329
343
|
class DeployStatus:
|
|
330
344
|
"""returned status from deploy operation"""
|
|
331
345
|
|
|
332
|
-
def __init__(self, state, outputs=
|
|
346
|
+
def __init__(self, state, outputs=None, function=None):
|
|
333
347
|
self.state = state
|
|
334
|
-
self.outputs = outputs
|
|
348
|
+
self.outputs = outputs or {}
|
|
335
349
|
self.function = function
|
|
336
350
|
|
|
337
351
|
def after(self, step):
|
|
@@ -351,7 +365,7 @@ def deploy_function(
|
|
|
351
365
|
builder_env: dict = None,
|
|
352
366
|
project_object=None,
|
|
353
367
|
mock: bool = None,
|
|
354
|
-
) -> Union[DeployStatus,
|
|
368
|
+
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
355
369
|
"""deploy real-time (nuclio based) functions
|
|
356
370
|
|
|
357
371
|
:param function: name of the function (in the project) or function object
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -13,24 +13,26 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import builtins
|
|
16
|
+
import http
|
|
16
17
|
import importlib.util as imputil
|
|
17
18
|
import os
|
|
18
19
|
import tempfile
|
|
19
20
|
import typing
|
|
20
21
|
import uuid
|
|
21
22
|
|
|
22
|
-
import
|
|
23
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
24
25
|
from kfp.compiler import compiler
|
|
26
|
+
from mlrun_pipelines.helpers import new_pipe_metadata
|
|
25
27
|
|
|
26
28
|
import mlrun
|
|
29
|
+
import mlrun.common.runtimes.constants
|
|
27
30
|
import mlrun.common.schemas
|
|
28
31
|
import mlrun.utils.notifications
|
|
29
32
|
from mlrun.errors import err_to_str
|
|
30
33
|
from mlrun.utils import (
|
|
31
34
|
get_ui_url,
|
|
32
35
|
logger,
|
|
33
|
-
new_pipe_metadata,
|
|
34
36
|
normalize_workflow_name,
|
|
35
37
|
retry_until_successful,
|
|
36
38
|
)
|
|
@@ -300,72 +302,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
300
302
|
}
|
|
301
303
|
|
|
302
304
|
|
|
303
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
304
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
305
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
306
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
307
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
308
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
309
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
310
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
311
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
312
|
-
def _create_enriched_mlrun_workflow(
|
|
313
|
-
self,
|
|
314
|
-
pipeline_func: typing.Callable,
|
|
315
|
-
pipeline_name: typing.Optional[str] = None,
|
|
316
|
-
pipeline_description: typing.Optional[str] = None,
|
|
317
|
-
params_list: typing.Optional[list[dsl.PipelineParam]] = None,
|
|
318
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
319
|
-
):
|
|
320
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
321
|
-
workflow = self._original_create_workflow(
|
|
322
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
323
|
-
)
|
|
324
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
325
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
326
|
-
# we know can be raised.
|
|
327
|
-
try:
|
|
328
|
-
functions = []
|
|
329
|
-
if pipeline_context.functions:
|
|
330
|
-
try:
|
|
331
|
-
functions = pipeline_context.functions.values()
|
|
332
|
-
except Exception as err:
|
|
333
|
-
logger.debug(
|
|
334
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
335
|
-
error=err_to_str(err),
|
|
336
|
-
)
|
|
337
|
-
return workflow
|
|
338
|
-
|
|
339
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
340
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
341
|
-
if kfp_step_template.get("container"):
|
|
342
|
-
for function_obj in functions:
|
|
343
|
-
# we condition within each function since the comparison between the function and
|
|
344
|
-
# the kfp pod may change depending on the attribute type.
|
|
345
|
-
_set_function_attribute_on_kfp_pod(
|
|
346
|
-
kfp_step_template,
|
|
347
|
-
function_obj,
|
|
348
|
-
"PriorityClassName",
|
|
349
|
-
"priority_class_name",
|
|
350
|
-
)
|
|
351
|
-
_enrich_kfp_pod_security_context(
|
|
352
|
-
kfp_step_template,
|
|
353
|
-
function_obj,
|
|
354
|
-
)
|
|
355
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
356
|
-
raise
|
|
357
|
-
except Exception as err:
|
|
358
|
-
logger.debug(
|
|
359
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
360
|
-
)
|
|
361
|
-
return workflow
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
# patching function as class method
|
|
365
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
366
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
367
|
-
|
|
368
|
-
|
|
369
305
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
370
306
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
371
307
|
key, project.metadata.name
|
|
@@ -412,6 +348,11 @@ def enrich_function_object(
|
|
|
412
348
|
if decorator:
|
|
413
349
|
decorator(f)
|
|
414
350
|
|
|
351
|
+
if project.spec.default_function_node_selector:
|
|
352
|
+
f.enrich_runtime_spec(
|
|
353
|
+
project.spec.default_function_node_selector,
|
|
354
|
+
)
|
|
355
|
+
|
|
415
356
|
if try_auto_mount:
|
|
416
357
|
if (
|
|
417
358
|
decorator and AutoMountType.is_auto_modifier(decorator)
|
|
@@ -431,7 +372,7 @@ class _PipelineRunStatus:
|
|
|
431
372
|
engine: type["_PipelineRunner"],
|
|
432
373
|
project: "mlrun.projects.MlrunProject",
|
|
433
374
|
workflow: WorkflowSpec = None,
|
|
434
|
-
state:
|
|
375
|
+
state: mlrun_pipelines.common.models.RunStatuses = "",
|
|
435
376
|
exc: Exception = None,
|
|
436
377
|
):
|
|
437
378
|
"""
|
|
@@ -451,7 +392,10 @@ class _PipelineRunStatus:
|
|
|
451
392
|
|
|
452
393
|
@property
|
|
453
394
|
def state(self):
|
|
454
|
-
if
|
|
395
|
+
if (
|
|
396
|
+
self._state
|
|
397
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
398
|
+
):
|
|
455
399
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
456
400
|
return self._state
|
|
457
401
|
|
|
@@ -516,7 +460,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
516
460
|
@staticmethod
|
|
517
461
|
def _get_handler(workflow_handler, workflow_spec, project, secrets):
|
|
518
462
|
if not (workflow_handler and callable(workflow_handler)):
|
|
519
|
-
workflow_file = workflow_spec.get_source_file(project.spec.
|
|
463
|
+
workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
|
|
520
464
|
workflow_handler = create_pipeline(
|
|
521
465
|
project,
|
|
522
466
|
workflow_file,
|
|
@@ -536,6 +480,7 @@ class _PipelineRunner(abc.ABC):
|
|
|
536
480
|
timeout=None,
|
|
537
481
|
expected_statuses=None,
|
|
538
482
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
483
|
+
**kwargs,
|
|
539
484
|
):
|
|
540
485
|
pass
|
|
541
486
|
|
|
@@ -548,7 +493,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
548
493
|
@classmethod
|
|
549
494
|
def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
|
|
550
495
|
pipeline_context.set(project, workflow_spec)
|
|
551
|
-
workflow_file = workflow_spec.get_source_file(project.spec.
|
|
496
|
+
workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
|
|
552
497
|
functions = FunctionsDict(project)
|
|
553
498
|
pipeline = create_pipeline(
|
|
554
499
|
project,
|
|
@@ -608,6 +553,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
608
553
|
namespace=namespace,
|
|
609
554
|
artifact_path=artifact_path,
|
|
610
555
|
cleanup_ttl=workflow_spec.cleanup_ttl,
|
|
556
|
+
timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
|
|
611
557
|
)
|
|
612
558
|
|
|
613
559
|
# The user provided workflow code might have made changes to function specs that require cleanup
|
|
@@ -666,6 +612,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
666
612
|
timeout=None,
|
|
667
613
|
expected_statuses=None,
|
|
668
614
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
615
|
+
**kwargs,
|
|
669
616
|
):
|
|
670
617
|
if timeout is None:
|
|
671
618
|
timeout = 60 * 60
|
|
@@ -747,7 +694,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
747
694
|
err = None
|
|
748
695
|
try:
|
|
749
696
|
workflow_handler(**workflow_spec.args)
|
|
750
|
-
state =
|
|
697
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
751
698
|
except Exception as exc:
|
|
752
699
|
err = exc
|
|
753
700
|
logger.exception("Workflow run failed")
|
|
@@ -755,7 +702,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
755
702
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
756
703
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
757
704
|
)
|
|
758
|
-
state =
|
|
705
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
759
706
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
760
707
|
project.notifiers.push_pipeline_run_results(
|
|
761
708
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -789,6 +736,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
789
736
|
timeout=None,
|
|
790
737
|
expected_statuses=None,
|
|
791
738
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
739
|
+
**kwargs,
|
|
792
740
|
):
|
|
793
741
|
pass
|
|
794
742
|
|
|
@@ -865,22 +813,44 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
865
813
|
)
|
|
866
814
|
return
|
|
867
815
|
|
|
816
|
+
get_workflow_id_timeout = max(
|
|
817
|
+
int(mlrun.mlconf.workflows.timeouts.remote),
|
|
818
|
+
int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
|
|
819
|
+
)
|
|
820
|
+
|
|
868
821
|
logger.debug(
|
|
869
822
|
"Workflow submitted, waiting for pipeline run to start",
|
|
870
823
|
workflow_name=workflow_response.name,
|
|
824
|
+
get_workflow_id_timeout=get_workflow_id_timeout,
|
|
871
825
|
)
|
|
872
826
|
|
|
827
|
+
def _get_workflow_id_or_bail():
|
|
828
|
+
try:
|
|
829
|
+
return run_db.get_workflow_id(
|
|
830
|
+
project=project.name,
|
|
831
|
+
name=workflow_response.name,
|
|
832
|
+
run_id=workflow_response.run_id,
|
|
833
|
+
engine=workflow_spec.engine,
|
|
834
|
+
)
|
|
835
|
+
except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
|
|
836
|
+
# fail fast on specific errors
|
|
837
|
+
if get_wf_exc.error_status_code in [
|
|
838
|
+
http.HTTPStatus.PRECONDITION_FAILED
|
|
839
|
+
]:
|
|
840
|
+
raise mlrun.errors.MLRunFatalFailureError(
|
|
841
|
+
original_exception=get_wf_exc
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
# raise for a retry (on other errors)
|
|
845
|
+
raise
|
|
846
|
+
|
|
873
847
|
# Getting workflow id from run:
|
|
874
848
|
response = retry_until_successful(
|
|
875
849
|
1,
|
|
876
|
-
|
|
850
|
+
get_workflow_id_timeout,
|
|
877
851
|
logger,
|
|
878
852
|
False,
|
|
879
|
-
|
|
880
|
-
project=project.name,
|
|
881
|
-
name=workflow_response.name,
|
|
882
|
-
run_id=workflow_response.run_id,
|
|
883
|
-
engine=workflow_spec.engine,
|
|
853
|
+
_get_workflow_id_or_bail,
|
|
884
854
|
)
|
|
885
855
|
workflow_id = response.workflow_id
|
|
886
856
|
# After fetching the workflow_id the workflow executed successfully
|
|
@@ -892,9 +862,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
892
862
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
893
863
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
894
864
|
)
|
|
895
|
-
state =
|
|
865
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
896
866
|
else:
|
|
897
|
-
state =
|
|
867
|
+
state = mlrun_pipelines.common.models.RunStatuses.running
|
|
898
868
|
project.notifiers.push_pipeline_start_message(
|
|
899
869
|
project.metadata.name,
|
|
900
870
|
)
|
|
@@ -911,24 +881,47 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
911
881
|
@staticmethod
|
|
912
882
|
def get_run_status(
|
|
913
883
|
project,
|
|
914
|
-
run,
|
|
884
|
+
run: _PipelineRunStatus,
|
|
915
885
|
timeout=None,
|
|
916
886
|
expected_statuses=None,
|
|
917
887
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
888
|
+
inner_engine: type[_PipelineRunner] = None,
|
|
918
889
|
):
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
890
|
+
inner_engine = inner_engine or _KFPRunner
|
|
891
|
+
if inner_engine.engine == _KFPRunner.engine:
|
|
892
|
+
# ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
|
|
893
|
+
# so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
|
|
894
|
+
local_project_notifiers = list(
|
|
895
|
+
set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
|
|
896
|
+
set(project.notifiers.notifications.keys())
|
|
897
|
+
)
|
|
898
|
+
)
|
|
899
|
+
notifiers = mlrun.utils.notifications.CustomNotificationPusher(
|
|
900
|
+
local_project_notifiers
|
|
901
|
+
)
|
|
902
|
+
return _KFPRunner.get_run_status(
|
|
903
|
+
project,
|
|
904
|
+
run,
|
|
905
|
+
timeout,
|
|
906
|
+
expected_statuses,
|
|
907
|
+
notifiers=notifiers,
|
|
908
|
+
)
|
|
928
909
|
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
910
|
+
elif inner_engine.engine == _LocalRunner.engine:
|
|
911
|
+
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
912
|
+
pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
|
|
913
|
+
pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
|
|
914
|
+
pipeline_runner_run.logs(db=mldb)
|
|
915
|
+
pipeline_runner_run.refresh()
|
|
916
|
+
run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
|
|
917
|
+
pipeline_runner_run.status.state
|
|
918
|
+
)
|
|
919
|
+
run._exc = pipeline_runner_run.status.error
|
|
920
|
+
|
|
921
|
+
else:
|
|
922
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
923
|
+
f"Unsupported inner runner engine: {inner_engine.engine}"
|
|
924
|
+
)
|
|
932
925
|
|
|
933
926
|
|
|
934
927
|
def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
|
|
@@ -1059,7 +1052,7 @@ def load_and_run(
|
|
|
1059
1052
|
)
|
|
1060
1053
|
|
|
1061
1054
|
except Exception as exc:
|
|
1062
|
-
logger.error("Failed to send slack notification", exc=exc)
|
|
1055
|
+
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1063
1056
|
|
|
1064
1057
|
raise error
|
|
1065
1058
|
|
|
@@ -1087,7 +1080,7 @@ def load_and_run(
|
|
|
1087
1080
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1088
1081
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1089
1082
|
|
|
1090
|
-
if run.state ==
|
|
1083
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1091
1084
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1092
1085
|
|
|
1093
1086
|
if wait_for_completion:
|
|
@@ -1102,7 +1095,7 @@ def load_and_run(
|
|
|
1102
1095
|
|
|
1103
1096
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1104
1097
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1105
|
-
if pipeline_state !=
|
|
1098
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1106
1099
|
raise RuntimeError(
|
|
1107
1100
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1108
1101
|
)
|