mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +31 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +13 -2
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +233 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +387 -119
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +245 -20
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +909 -231
- mlrun/db/nopdb.py +279 -14
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1176 -406
- mlrun/render.py +28 -22
- mlrun/run.py +208 -181
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +54 -24
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/METADATA +0 -269
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/kfpops.py
DELETED
|
@@ -1,868 +0,0 @@
|
|
|
1
|
-
# Copyright 2023 Iguazio
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
import json
|
|
15
|
-
import os
|
|
16
|
-
import os.path
|
|
17
|
-
from copy import deepcopy
|
|
18
|
-
from typing import Union
|
|
19
|
-
|
|
20
|
-
import inflection
|
|
21
|
-
from kfp import dsl
|
|
22
|
-
from kubernetes import client as k8s_client
|
|
23
|
-
|
|
24
|
-
import mlrun
|
|
25
|
-
from mlrun.errors import err_to_str
|
|
26
|
-
|
|
27
|
-
from .config import config
|
|
28
|
-
from .model import HyperParamOptions, RunSpec
|
|
29
|
-
from .utils import (
|
|
30
|
-
dict_to_yaml,
|
|
31
|
-
gen_md_table,
|
|
32
|
-
get_artifact_target,
|
|
33
|
-
get_in,
|
|
34
|
-
get_workflow_url,
|
|
35
|
-
is_ipython,
|
|
36
|
-
is_legacy_artifact,
|
|
37
|
-
logger,
|
|
38
|
-
run_keys,
|
|
39
|
-
version,
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
# default KFP artifacts and output (ui metadata, metrics etc.)
|
|
43
|
-
# directories to /tmp to allow running with security context
|
|
44
|
-
KFPMETA_DIR = "/tmp"
|
|
45
|
-
KFP_ARTIFACTS_DIR = "/tmp"
|
|
46
|
-
|
|
47
|
-
project_annotation = "mlrun/project"
|
|
48
|
-
run_annotation = "mlrun/pipeline-step-type"
|
|
49
|
-
function_annotation = "mlrun/function-uri"
|
|
50
|
-
|
|
51
|
-
dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class PipelineRunType:
|
|
55
|
-
run = "run"
|
|
56
|
-
build = "build"
|
|
57
|
-
deploy = "deploy"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def is_num(v):
|
|
61
|
-
return isinstance(v, (int, float, complex))
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def write_kfpmeta(struct):
|
|
65
|
-
if "status" not in struct:
|
|
66
|
-
return
|
|
67
|
-
|
|
68
|
-
results = struct["status"].get("results", {})
|
|
69
|
-
metrics = {
|
|
70
|
-
"metrics": [
|
|
71
|
-
{"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
|
|
72
|
-
],
|
|
73
|
-
}
|
|
74
|
-
with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
|
|
75
|
-
json.dump(metrics, f)
|
|
76
|
-
|
|
77
|
-
struct = deepcopy(struct)
|
|
78
|
-
uid = struct["metadata"].get("uid")
|
|
79
|
-
project = struct["metadata"].get("project", config.default_project)
|
|
80
|
-
output_artifacts, out_dict = get_kfp_outputs(
|
|
81
|
-
struct["status"].get(run_keys.artifacts, []),
|
|
82
|
-
struct["metadata"].get("labels", {}),
|
|
83
|
-
project,
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
results["run_id"] = results.get("run_id", "/".join([project, uid]))
|
|
87
|
-
for key in struct["spec"].get(run_keys.outputs, []):
|
|
88
|
-
val = "None"
|
|
89
|
-
if key in out_dict:
|
|
90
|
-
val = out_dict[key]
|
|
91
|
-
elif key in results:
|
|
92
|
-
val = results[key]
|
|
93
|
-
try:
|
|
94
|
-
# NOTE: if key has "../x", it would fail on path traversal
|
|
95
|
-
path = os.path.join(KFP_ARTIFACTS_DIR, key)
|
|
96
|
-
if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
|
|
97
|
-
logger.warning(
|
|
98
|
-
"Path traversal is not allowed ignoring", path=path, key=key
|
|
99
|
-
)
|
|
100
|
-
continue
|
|
101
|
-
path = os.path.abspath(path)
|
|
102
|
-
logger.info("Writing artifact output", path=path, val=val)
|
|
103
|
-
with open(path, "w") as fp:
|
|
104
|
-
fp.write(str(val))
|
|
105
|
-
except Exception as exc:
|
|
106
|
-
logger.warning("Failed writing to temp file. Ignoring", exc=repr(exc))
|
|
107
|
-
pass
|
|
108
|
-
|
|
109
|
-
text = "# Run Report\n"
|
|
110
|
-
if "iterations" in struct["status"]:
|
|
111
|
-
del struct["status"]["iterations"]
|
|
112
|
-
|
|
113
|
-
text += "## Metadata\n```yaml\n" + dict_to_yaml(struct) + "```\n"
|
|
114
|
-
|
|
115
|
-
metadata = {
|
|
116
|
-
"outputs": output_artifacts
|
|
117
|
-
+ [{"type": "markdown", "storage": "inline", "source": text}]
|
|
118
|
-
}
|
|
119
|
-
with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
|
|
120
|
-
json.dump(metadata, f)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def get_kfp_outputs(artifacts, labels, project):
|
|
124
|
-
outputs = []
|
|
125
|
-
out_dict = {}
|
|
126
|
-
for output in artifacts:
|
|
127
|
-
if is_legacy_artifact(output):
|
|
128
|
-
key = output["key"]
|
|
129
|
-
# The spec in a legacy artifact is contained in the main object, so using this assignment saves us a lot
|
|
130
|
-
# of if/else in the rest of this function.
|
|
131
|
-
output_spec = output
|
|
132
|
-
else:
|
|
133
|
-
key = output.get("metadata")["key"]
|
|
134
|
-
output_spec = output.get("spec", {})
|
|
135
|
-
|
|
136
|
-
target = output_spec.get("target_path", "")
|
|
137
|
-
target = output_spec.get("inline", target)
|
|
138
|
-
|
|
139
|
-
out_dict[key] = get_artifact_target(output, project=project)
|
|
140
|
-
|
|
141
|
-
if target.startswith("v3io:///"):
|
|
142
|
-
target = target.replace("v3io:///", "http://v3io-webapi:8081/")
|
|
143
|
-
|
|
144
|
-
user = labels.get("v3io_user", "") or os.environ.get("V3IO_USERNAME", "")
|
|
145
|
-
if target.startswith("/User/"):
|
|
146
|
-
user = user or "admin"
|
|
147
|
-
target = "http://v3io-webapi:8081/users/" + user + target[5:]
|
|
148
|
-
|
|
149
|
-
viewer = output_spec.get("viewer", "")
|
|
150
|
-
if viewer in ["web-app", "chart"]:
|
|
151
|
-
meta = {"type": "web-app", "source": target}
|
|
152
|
-
outputs += [meta]
|
|
153
|
-
|
|
154
|
-
elif viewer == "table":
|
|
155
|
-
header = output_spec.get("header", None)
|
|
156
|
-
if header and target.endswith(".csv"):
|
|
157
|
-
meta = {
|
|
158
|
-
"type": "table",
|
|
159
|
-
"format": "csv",
|
|
160
|
-
"header": header,
|
|
161
|
-
"source": target,
|
|
162
|
-
}
|
|
163
|
-
outputs += [meta]
|
|
164
|
-
|
|
165
|
-
elif output.get("kind") == "dataset":
|
|
166
|
-
header = output_spec.get("header")
|
|
167
|
-
preview = output_spec.get("preview")
|
|
168
|
-
if preview:
|
|
169
|
-
tbl_md = gen_md_table(header, preview)
|
|
170
|
-
text = f"## Dataset: {key} \n\n" + tbl_md
|
|
171
|
-
del output_spec["preview"]
|
|
172
|
-
|
|
173
|
-
meta = {"type": "markdown", "storage": "inline", "source": text}
|
|
174
|
-
outputs += [meta]
|
|
175
|
-
|
|
176
|
-
return outputs, out_dict
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def mlrun_op(
|
|
180
|
-
name: str = "",
|
|
181
|
-
project: str = "",
|
|
182
|
-
function=None,
|
|
183
|
-
func_url=None,
|
|
184
|
-
image: str = "",
|
|
185
|
-
runobj=None,
|
|
186
|
-
command: str = "",
|
|
187
|
-
secrets: list = None,
|
|
188
|
-
params: dict = None,
|
|
189
|
-
job_image=None,
|
|
190
|
-
hyperparams: dict = None,
|
|
191
|
-
param_file: str = "",
|
|
192
|
-
labels: dict = None,
|
|
193
|
-
selector: str = "",
|
|
194
|
-
inputs: dict = None,
|
|
195
|
-
outputs: list = None,
|
|
196
|
-
in_path: str = "",
|
|
197
|
-
out_path: str = "",
|
|
198
|
-
rundb: str = "",
|
|
199
|
-
mode: str = "",
|
|
200
|
-
handler: str = "",
|
|
201
|
-
more_args: list = None,
|
|
202
|
-
hyper_param_options=None,
|
|
203
|
-
verbose=None,
|
|
204
|
-
scrape_metrics=False,
|
|
205
|
-
returns: list[Union[str, dict[str, str]]] = None,
|
|
206
|
-
auto_build: bool = False,
|
|
207
|
-
):
|
|
208
|
-
"""mlrun KubeFlow pipelines operator, use to form pipeline steps
|
|
209
|
-
|
|
210
|
-
when using kubeflow pipelines, each step is wrapped in an mlrun_op
|
|
211
|
-
one step can pass state and data to the next step, see example below.
|
|
212
|
-
|
|
213
|
-
:param name: name used for the step
|
|
214
|
-
:param project: optional, project name
|
|
215
|
-
:param image: optional, run container image (will be executing the step)
|
|
216
|
-
the container should host all required packages + code
|
|
217
|
-
for the run, alternatively user can mount packages/code via
|
|
218
|
-
shared file volumes like v3io (see example below)
|
|
219
|
-
:param function: optional, function object
|
|
220
|
-
:param func_url: optional, function object url
|
|
221
|
-
:param command: exec command (or URL for functions)
|
|
222
|
-
:param secrets: extra secrets specs, will be injected into the runtime
|
|
223
|
-
e.g. ['file=<filename>', 'env=ENV_KEY1,ENV_KEY2']
|
|
224
|
-
:param params: dictionary of run parameters and values
|
|
225
|
-
:param hyperparams: dictionary of hyper parameters and list values, each
|
|
226
|
-
hyperparam holds a list of values, the run will be
|
|
227
|
-
executed for every parameter combination (GridSearch)
|
|
228
|
-
:param param_file: a csv/json file with parameter combinations, first csv row hold
|
|
229
|
-
the parameter names, following rows hold param values
|
|
230
|
-
:param selector: selection criteria for hyperparams e.g. "max.accuracy"
|
|
231
|
-
:param hyper_param_options: hyper param options class, see: :py:class:`~mlrun.model.HyperParamOptions`
|
|
232
|
-
:param labels: labels to tag the job/run with ({key:val, ..})
|
|
233
|
-
:param inputs: dictionary of input objects + optional paths (if path is
|
|
234
|
-
omitted the path will be the in_path/key.
|
|
235
|
-
:param outputs: dictionary of output objects + optional paths (if path is
|
|
236
|
-
omitted the path will be the out_path/key.
|
|
237
|
-
:param in_path: default input path/url (prefix) for inputs
|
|
238
|
-
:param out_path: default output path/url (prefix) for artifacts
|
|
239
|
-
:param rundb: path for rundb (or use 'MLRUN_DBPATH' env instead)
|
|
240
|
-
:param mode: run mode, e.g. 'pass' for using the command without mlrun wrapper
|
|
241
|
-
:param handler code entry-point/handler name
|
|
242
|
-
:param job_image name of the image user for the job
|
|
243
|
-
:param verbose: add verbose prints/logs
|
|
244
|
-
:param scrape_metrics: whether to add the `mlrun/scrape-metrics` label to this run's resources
|
|
245
|
-
:param returns: List of configurations for how to log the returning values from the handler's run (as artifacts or
|
|
246
|
-
results). The list's length must be equal to the amount of returning objects. A configuration may be
|
|
247
|
-
given as:
|
|
248
|
-
|
|
249
|
-
* A string of the key to use to log the returning value as result or as an artifact. To specify
|
|
250
|
-
The artifact type, it is possible to pass a string in the following structure:
|
|
251
|
-
"<key> : <type>". Available artifact types can be seen in `mlrun.ArtifactType`. If no artifact
|
|
252
|
-
type is specified, the object's default artifact type will be used.
|
|
253
|
-
* A dictionary of configurations to use when logging. Further info per object type and artifact
|
|
254
|
-
type can be given there. The artifact key must appear in the dictionary as "key": "the_key".
|
|
255
|
-
:param auto_build: when set to True and the function require build it will be built on the first
|
|
256
|
-
function run, use only if you dont plan on changing the build config between runs
|
|
257
|
-
|
|
258
|
-
:returns: KFP step operation
|
|
259
|
-
|
|
260
|
-
Example:
|
|
261
|
-
from kfp import dsl
|
|
262
|
-
from mlrun import mlrun_op
|
|
263
|
-
from mlrun.platforms import mount_v3io
|
|
264
|
-
|
|
265
|
-
def mlrun_train(p1, p2):
|
|
266
|
-
return mlrun_op('training',
|
|
267
|
-
command = '/User/kubeflow/training.py',
|
|
268
|
-
params = {'p1':p1, 'p2':p2},
|
|
269
|
-
outputs = {'model.txt':'', 'dataset.csv':''},
|
|
270
|
-
out_path ='v3io:///projects/my-proj/mlrun/{{workflow.uid}}/',
|
|
271
|
-
rundb = '/User/kubeflow')
|
|
272
|
-
|
|
273
|
-
# use data from the first step
|
|
274
|
-
def mlrun_validate(modelfile):
|
|
275
|
-
return mlrun_op('validation',
|
|
276
|
-
command = '/User/kubeflow/validation.py',
|
|
277
|
-
inputs = {'model.txt':modelfile},
|
|
278
|
-
out_path ='v3io:///projects/my-proj/{{workflow.uid}}/',
|
|
279
|
-
rundb = '/User/kubeflow')
|
|
280
|
-
|
|
281
|
-
@dsl.pipeline(
|
|
282
|
-
name='My MLRUN pipeline', description='Shows how to use mlrun.'
|
|
283
|
-
)
|
|
284
|
-
def mlrun_pipeline(
|
|
285
|
-
p1 = 5 , p2 = '"text"'
|
|
286
|
-
):
|
|
287
|
-
# run training, mount_v3io will mount "/User" into the pipeline step
|
|
288
|
-
train = mlrun_train(p1, p2).apply(mount_v3io())
|
|
289
|
-
|
|
290
|
-
# feed 1st step results into the second step
|
|
291
|
-
validate = mlrun_validate(
|
|
292
|
-
train.outputs['model-txt']).apply(mount_v3io())
|
|
293
|
-
|
|
294
|
-
"""
|
|
295
|
-
secrets = [] if secrets is None else secrets
|
|
296
|
-
params = {} if params is None else params
|
|
297
|
-
hyperparams = {} if hyperparams is None else hyperparams
|
|
298
|
-
if hyper_param_options and isinstance(hyper_param_options, dict):
|
|
299
|
-
hyper_param_options = HyperParamOptions.from_dict(hyper_param_options)
|
|
300
|
-
inputs = {} if inputs is None else inputs
|
|
301
|
-
returns = [] if returns is None else returns
|
|
302
|
-
outputs = [] if outputs is None else outputs
|
|
303
|
-
labels = {} if labels is None else labels
|
|
304
|
-
|
|
305
|
-
rundb = rundb or mlrun.db.get_or_set_dburl()
|
|
306
|
-
cmd = [
|
|
307
|
-
"python",
|
|
308
|
-
"-m",
|
|
309
|
-
"mlrun",
|
|
310
|
-
"run",
|
|
311
|
-
"--kfp",
|
|
312
|
-
"--from-env",
|
|
313
|
-
"--workflow",
|
|
314
|
-
"{{workflow.uid}}",
|
|
315
|
-
]
|
|
316
|
-
file_outputs = {}
|
|
317
|
-
|
|
318
|
-
runtime = None
|
|
319
|
-
code_env = None
|
|
320
|
-
function_name = ""
|
|
321
|
-
if function:
|
|
322
|
-
if not func_url:
|
|
323
|
-
if function.kind in ["", "local"]:
|
|
324
|
-
image = image or function.spec.image
|
|
325
|
-
command = command or function.spec.command
|
|
326
|
-
more_args = more_args or function.spec.args
|
|
327
|
-
mode = mode or function.spec.mode
|
|
328
|
-
rundb = rundb or function.spec.rundb
|
|
329
|
-
code_env = str(function.spec.build.functionSourceCode)
|
|
330
|
-
else:
|
|
331
|
-
runtime = str(function.to_dict())
|
|
332
|
-
|
|
333
|
-
function_name = function.metadata.name
|
|
334
|
-
if function.kind == "dask":
|
|
335
|
-
image = image or function.spec.kfp_image or config.dask_kfp_image
|
|
336
|
-
|
|
337
|
-
image = image or config.kfp_image
|
|
338
|
-
|
|
339
|
-
if runobj:
|
|
340
|
-
handler = handler or runobj.spec.handler_name
|
|
341
|
-
params = params or runobj.spec.parameters
|
|
342
|
-
hyperparams = hyperparams or runobj.spec.hyperparams
|
|
343
|
-
param_file = (
|
|
344
|
-
param_file
|
|
345
|
-
or runobj.spec.param_file
|
|
346
|
-
or runobj.spec.hyper_param_options.param_file
|
|
347
|
-
)
|
|
348
|
-
hyper_param_options = hyper_param_options or runobj.spec.hyper_param_options
|
|
349
|
-
selector = (
|
|
350
|
-
selector or runobj.spec.selector or runobj.spec.hyper_param_options.selector
|
|
351
|
-
)
|
|
352
|
-
inputs = inputs or runobj.spec.inputs
|
|
353
|
-
returns = returns or runobj.spec.returns
|
|
354
|
-
outputs = outputs or runobj.spec.outputs
|
|
355
|
-
in_path = in_path or runobj.spec.input_path
|
|
356
|
-
out_path = out_path or runobj.spec.output_path
|
|
357
|
-
secrets = secrets or runobj.spec.secret_sources
|
|
358
|
-
project = project or runobj.metadata.project
|
|
359
|
-
labels = runobj.metadata.labels or labels
|
|
360
|
-
verbose = verbose or runobj.spec.verbose
|
|
361
|
-
scrape_metrics = scrape_metrics or runobj.spec.scrape_metrics
|
|
362
|
-
|
|
363
|
-
outputs = RunSpec.join_outputs_and_returns(outputs=outputs, returns=returns)
|
|
364
|
-
|
|
365
|
-
if not name:
|
|
366
|
-
if not function_name:
|
|
367
|
-
raise ValueError("name or function object must be specified")
|
|
368
|
-
name = function_name
|
|
369
|
-
if handler:
|
|
370
|
-
short_name = handler
|
|
371
|
-
for separator in ["#", "::", "."]:
|
|
372
|
-
# drop paths, module or class name from short name
|
|
373
|
-
if separator in short_name:
|
|
374
|
-
short_name = short_name.split(separator)[-1]
|
|
375
|
-
name += "-" + short_name
|
|
376
|
-
|
|
377
|
-
if hyperparams or param_file:
|
|
378
|
-
outputs.append("iteration_results")
|
|
379
|
-
if "run_id" not in outputs:
|
|
380
|
-
outputs.append("run_id")
|
|
381
|
-
|
|
382
|
-
params = params or {}
|
|
383
|
-
hyperparams = hyperparams or {}
|
|
384
|
-
inputs = inputs or {}
|
|
385
|
-
returns = returns or []
|
|
386
|
-
secrets = secrets or []
|
|
387
|
-
|
|
388
|
-
mlrun.runtimes.utils.enrich_run_labels(labels)
|
|
389
|
-
|
|
390
|
-
if name:
|
|
391
|
-
cmd += ["--name", name]
|
|
392
|
-
if func_url:
|
|
393
|
-
cmd += ["-f", func_url]
|
|
394
|
-
for secret in secrets:
|
|
395
|
-
cmd += ["-s", f"{secret['kind']}={secret['source']}"]
|
|
396
|
-
for param, val in params.items():
|
|
397
|
-
cmd += ["-p", f"{param}={val}"]
|
|
398
|
-
for xpram, val in hyperparams.items():
|
|
399
|
-
cmd += ["-x", f"{xpram}={val}"]
|
|
400
|
-
for input_param, val in inputs.items():
|
|
401
|
-
cmd += ["-i", f"{input_param}={val}"]
|
|
402
|
-
for log_hint in returns:
|
|
403
|
-
cmd += [
|
|
404
|
-
"--returns",
|
|
405
|
-
json.dumps(log_hint) if isinstance(log_hint, dict) else log_hint,
|
|
406
|
-
]
|
|
407
|
-
for label, val in labels.items():
|
|
408
|
-
cmd += ["--label", f"{label}={val}"]
|
|
409
|
-
for output in outputs:
|
|
410
|
-
cmd += ["-o", str(output)]
|
|
411
|
-
file_outputs[output.replace(".", "_")] = (
|
|
412
|
-
f"/tmp/{output}" # not using path.join to avoid windows "\"
|
|
413
|
-
)
|
|
414
|
-
if project:
|
|
415
|
-
cmd += ["--project", project]
|
|
416
|
-
if handler:
|
|
417
|
-
cmd += ["--handler", handler]
|
|
418
|
-
if runtime:
|
|
419
|
-
cmd += ["--runtime", runtime]
|
|
420
|
-
if in_path:
|
|
421
|
-
cmd += ["--in-path", in_path]
|
|
422
|
-
if out_path:
|
|
423
|
-
cmd += ["--out-path", out_path]
|
|
424
|
-
if param_file:
|
|
425
|
-
cmd += ["--param-file", param_file]
|
|
426
|
-
if hyper_param_options:
|
|
427
|
-
cmd += ["--hyper-param-options", hyper_param_options.to_json()]
|
|
428
|
-
if selector:
|
|
429
|
-
cmd += ["--selector", selector]
|
|
430
|
-
if job_image:
|
|
431
|
-
cmd += ["--image", job_image]
|
|
432
|
-
if mode:
|
|
433
|
-
cmd += ["--mode", mode]
|
|
434
|
-
if verbose:
|
|
435
|
-
cmd += ["--verbose"]
|
|
436
|
-
if scrape_metrics:
|
|
437
|
-
cmd += ["--scrape-metrics"]
|
|
438
|
-
if auto_build:
|
|
439
|
-
cmd += ["--auto-build"]
|
|
440
|
-
if more_args:
|
|
441
|
-
cmd += more_args
|
|
442
|
-
|
|
443
|
-
registry = get_default_reg()
|
|
444
|
-
if image and image.startswith("."):
|
|
445
|
-
if registry:
|
|
446
|
-
image = f"{registry}/{image[1:]}"
|
|
447
|
-
else:
|
|
448
|
-
raise ValueError("local image registry env not found")
|
|
449
|
-
|
|
450
|
-
image = mlrun.utils.enrich_image_url(
|
|
451
|
-
image, mlrun.get_version(), str(version.Version().get_python_version())
|
|
452
|
-
)
|
|
453
|
-
|
|
454
|
-
cop = dsl.ContainerOp(
|
|
455
|
-
name=name,
|
|
456
|
-
image=image,
|
|
457
|
-
command=cmd + [command],
|
|
458
|
-
file_outputs=file_outputs,
|
|
459
|
-
output_artifact_paths={
|
|
460
|
-
"mlpipeline-ui-metadata": os.path.join(
|
|
461
|
-
KFPMETA_DIR, "mlpipeline-ui-metadata.json"
|
|
462
|
-
),
|
|
463
|
-
"mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
|
|
464
|
-
},
|
|
465
|
-
)
|
|
466
|
-
cop = add_default_function_resources(cop)
|
|
467
|
-
cop = add_function_node_selection_attributes(container_op=cop, function=function)
|
|
468
|
-
|
|
469
|
-
add_annotations(cop, PipelineRunType.run, function, func_url, project)
|
|
470
|
-
add_labels(cop, function, scrape_metrics)
|
|
471
|
-
if code_env:
|
|
472
|
-
cop.container.add_env_variable(
|
|
473
|
-
k8s_client.V1EnvVar(name="MLRUN_EXEC_CODE", value=code_env)
|
|
474
|
-
)
|
|
475
|
-
if registry:
|
|
476
|
-
cop.container.add_env_variable(
|
|
477
|
-
k8s_client.V1EnvVar(
|
|
478
|
-
name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY", value=registry
|
|
479
|
-
)
|
|
480
|
-
)
|
|
481
|
-
|
|
482
|
-
add_default_env(k8s_client, cop)
|
|
483
|
-
|
|
484
|
-
return cop
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
def deploy_op(
|
|
488
|
-
name,
|
|
489
|
-
function,
|
|
490
|
-
func_url=None,
|
|
491
|
-
source="",
|
|
492
|
-
project="",
|
|
493
|
-
models: list = None,
|
|
494
|
-
env: dict = None,
|
|
495
|
-
tag="",
|
|
496
|
-
verbose=False,
|
|
497
|
-
):
|
|
498
|
-
cmd = ["python", "-m", "mlrun", "deploy"]
|
|
499
|
-
if source:
|
|
500
|
-
cmd += ["-s", source]
|
|
501
|
-
if tag:
|
|
502
|
-
cmd += ["--tag", tag]
|
|
503
|
-
if verbose:
|
|
504
|
-
cmd += ["--verbose"]
|
|
505
|
-
if project:
|
|
506
|
-
cmd += ["-p", project]
|
|
507
|
-
|
|
508
|
-
if models:
|
|
509
|
-
for m in models:
|
|
510
|
-
for key in ["key", "model_path", "model_url", "class_name", "model_url"]:
|
|
511
|
-
if key in m:
|
|
512
|
-
m[key] = str(m[key]) # verify we stringify pipeline params
|
|
513
|
-
if function.kind == mlrun.runtimes.RuntimeKinds.serving:
|
|
514
|
-
cmd += ["-m", json.dumps(m)]
|
|
515
|
-
else:
|
|
516
|
-
cmd += ["-m", f"{m['key']}={m['model_path']}"]
|
|
517
|
-
|
|
518
|
-
if env:
|
|
519
|
-
for key, val in env.items():
|
|
520
|
-
cmd += ["--env", f"{key}={val}"]
|
|
521
|
-
|
|
522
|
-
if func_url:
|
|
523
|
-
cmd += ["-f", func_url]
|
|
524
|
-
else:
|
|
525
|
-
runtime = f"{function.to_dict()}"
|
|
526
|
-
cmd += [runtime]
|
|
527
|
-
|
|
528
|
-
cop = dsl.ContainerOp(
|
|
529
|
-
name=name,
|
|
530
|
-
image=config.kfp_image,
|
|
531
|
-
command=cmd,
|
|
532
|
-
file_outputs={"endpoint": "/tmp/output", "name": "/tmp/name"},
|
|
533
|
-
)
|
|
534
|
-
cop = add_default_function_resources(cop)
|
|
535
|
-
cop = add_function_node_selection_attributes(container_op=cop, function=function)
|
|
536
|
-
|
|
537
|
-
add_annotations(cop, PipelineRunType.deploy, function, func_url)
|
|
538
|
-
add_default_env(k8s_client, cop)
|
|
539
|
-
return cop
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
def add_env(env=None):
|
|
543
|
-
"""
|
|
544
|
-
Modifier function to add env vars from dict
|
|
545
|
-
Usage:
|
|
546
|
-
train = train_op(...)
|
|
547
|
-
train.apply(add_env({'MY_ENV':'123'}))
|
|
548
|
-
"""
|
|
549
|
-
|
|
550
|
-
env = {} if env is None else env
|
|
551
|
-
|
|
552
|
-
def _add_env(task):
|
|
553
|
-
for k, v in env.items():
|
|
554
|
-
task.add_env_variable(k8s_client.V1EnvVar(name=k, value=v))
|
|
555
|
-
return task
|
|
556
|
-
|
|
557
|
-
return _add_env
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
def build_op(
|
|
561
|
-
name,
|
|
562
|
-
function=None,
|
|
563
|
-
func_url=None,
|
|
564
|
-
image=None,
|
|
565
|
-
base_image=None,
|
|
566
|
-
commands: list = None,
|
|
567
|
-
secret_name="",
|
|
568
|
-
with_mlrun=True,
|
|
569
|
-
skip_deployed=False,
|
|
570
|
-
):
|
|
571
|
-
"""build Docker image."""
|
|
572
|
-
|
|
573
|
-
cmd = ["python", "-m", "mlrun", "build", "--kfp"]
|
|
574
|
-
if function:
|
|
575
|
-
if not hasattr(function, "to_dict"):
|
|
576
|
-
raise ValueError("function must specify a function runtime object")
|
|
577
|
-
cmd += ["-r", str(function.to_dict())]
|
|
578
|
-
elif not func_url:
|
|
579
|
-
raise ValueError("function object or func_url must be specified")
|
|
580
|
-
|
|
581
|
-
commands = commands or []
|
|
582
|
-
if image:
|
|
583
|
-
cmd += ["-i", image]
|
|
584
|
-
if base_image:
|
|
585
|
-
cmd += ["-b", base_image]
|
|
586
|
-
if secret_name:
|
|
587
|
-
cmd += ["--secret-name", secret_name]
|
|
588
|
-
if with_mlrun:
|
|
589
|
-
cmd += ["--with-mlrun"]
|
|
590
|
-
if skip_deployed:
|
|
591
|
-
cmd += ["--skip"]
|
|
592
|
-
for c in commands:
|
|
593
|
-
cmd += ["-c", c]
|
|
594
|
-
if func_url and not function:
|
|
595
|
-
cmd += [func_url]
|
|
596
|
-
|
|
597
|
-
cop = dsl.ContainerOp(
|
|
598
|
-
name=name,
|
|
599
|
-
image=config.kfp_image,
|
|
600
|
-
command=cmd,
|
|
601
|
-
file_outputs={"state": "/tmp/state", "image": "/tmp/image"},
|
|
602
|
-
)
|
|
603
|
-
cop = add_default_function_resources(cop)
|
|
604
|
-
cop = add_function_node_selection_attributes(container_op=cop, function=function)
|
|
605
|
-
|
|
606
|
-
add_annotations(cop, PipelineRunType.build, function, func_url)
|
|
607
|
-
if config.httpdb.builder.docker_registry:
|
|
608
|
-
cop.container.add_env_variable(
|
|
609
|
-
k8s_client.V1EnvVar(
|
|
610
|
-
name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY",
|
|
611
|
-
value=config.httpdb.builder.docker_registry,
|
|
612
|
-
)
|
|
613
|
-
)
|
|
614
|
-
if "IGZ_NAMESPACE_DOMAIN" in os.environ:
|
|
615
|
-
cop.container.add_env_variable(
|
|
616
|
-
k8s_client.V1EnvVar(
|
|
617
|
-
name="IGZ_NAMESPACE_DOMAIN",
|
|
618
|
-
value=os.environ.get("IGZ_NAMESPACE_DOMAIN"),
|
|
619
|
-
)
|
|
620
|
-
)
|
|
621
|
-
|
|
622
|
-
is_v3io = function.spec.build.source and function.spec.build.source.startswith(
|
|
623
|
-
"v3io"
|
|
624
|
-
)
|
|
625
|
-
if "V3IO_ACCESS_KEY" in os.environ and is_v3io:
|
|
626
|
-
cop.container.add_env_variable(
|
|
627
|
-
k8s_client.V1EnvVar(
|
|
628
|
-
name="V3IO_ACCESS_KEY", value=os.environ.get("V3IO_ACCESS_KEY")
|
|
629
|
-
)
|
|
630
|
-
)
|
|
631
|
-
|
|
632
|
-
add_default_env(k8s_client, cop)
|
|
633
|
-
|
|
634
|
-
return cop
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
def add_default_env(k8s_client, cop):
|
|
638
|
-
cop.container.add_env_variable(
|
|
639
|
-
k8s_client.V1EnvVar(
|
|
640
|
-
"MLRUN_NAMESPACE",
|
|
641
|
-
value_from=k8s_client.V1EnvVarSource(
|
|
642
|
-
field_ref=k8s_client.V1ObjectFieldSelector(
|
|
643
|
-
field_path="metadata.namespace"
|
|
644
|
-
)
|
|
645
|
-
),
|
|
646
|
-
)
|
|
647
|
-
)
|
|
648
|
-
|
|
649
|
-
if config.httpdb.api_url:
|
|
650
|
-
cop.container.add_env_variable(
|
|
651
|
-
k8s_client.V1EnvVar(name="MLRUN_DBPATH", value=config.httpdb.api_url)
|
|
652
|
-
)
|
|
653
|
-
|
|
654
|
-
if config.mpijob_crd_version:
|
|
655
|
-
cop.container.add_env_variable(
|
|
656
|
-
k8s_client.V1EnvVar(
|
|
657
|
-
name="MLRUN_MPIJOB_CRD_VERSION", value=config.mpijob_crd_version
|
|
658
|
-
)
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
auth_env_var = mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session
|
|
662
|
-
if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
|
|
663
|
-
cop.container.add_env_variable(
|
|
664
|
-
k8s_client.V1EnvVar(
|
|
665
|
-
name=auth_env_var,
|
|
666
|
-
value=os.environ.get(auth_env_var) or os.environ.get("V3IO_ACCESS_KEY"),
|
|
667
|
-
)
|
|
668
|
-
)
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
def get_default_reg():
|
|
672
|
-
if config.httpdb.builder.docker_registry:
|
|
673
|
-
return config.httpdb.builder.docker_registry
|
|
674
|
-
namespace_domain = os.environ.get("IGZ_NAMESPACE_DOMAIN", None)
|
|
675
|
-
if namespace_domain is not None:
|
|
676
|
-
return f"docker-registry.{namespace_domain}:80"
|
|
677
|
-
return ""
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
def add_annotations(cop, kind, function, func_url=None, project=None):
|
|
681
|
-
if func_url and func_url.startswith("db://"):
|
|
682
|
-
func_url = func_url[len("db://") :]
|
|
683
|
-
cop.add_pod_annotation(run_annotation, kind)
|
|
684
|
-
cop.add_pod_annotation(project_annotation, project or function.metadata.project)
|
|
685
|
-
cop.add_pod_annotation(function_annotation, func_url or function.uri)
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
def add_labels(cop, function, scrape_metrics=False):
|
|
689
|
-
prefix = mlrun.runtimes.utils.mlrun_key
|
|
690
|
-
cop.add_pod_label(prefix + "class", function.kind)
|
|
691
|
-
cop.add_pod_label(prefix + "function", function.metadata.name)
|
|
692
|
-
cop.add_pod_label(prefix + "name", cop.human_name)
|
|
693
|
-
cop.add_pod_label(prefix + "project", function.metadata.project)
|
|
694
|
-
cop.add_pod_label(prefix + "tag", function.metadata.tag or "latest")
|
|
695
|
-
cop.add_pod_label(prefix + "scrape-metrics", "True" if scrape_metrics else "False")
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
def generate_kfp_dag_and_resolve_project(run, project=None):
|
|
699
|
-
workflow = run.get("pipeline_runtime", {}).get("workflow_manifest")
|
|
700
|
-
if not workflow:
|
|
701
|
-
return None, project, None
|
|
702
|
-
workflow = json.loads(workflow)
|
|
703
|
-
|
|
704
|
-
templates = {}
|
|
705
|
-
for template in workflow["spec"]["templates"]:
|
|
706
|
-
project = project or get_in(
|
|
707
|
-
template, ["metadata", "annotations", project_annotation], ""
|
|
708
|
-
)
|
|
709
|
-
name = template["name"]
|
|
710
|
-
templates[name] = {
|
|
711
|
-
"run_type": get_in(
|
|
712
|
-
template, ["metadata", "annotations", run_annotation], ""
|
|
713
|
-
),
|
|
714
|
-
"function": get_in(
|
|
715
|
-
template, ["metadata", "annotations", function_annotation], ""
|
|
716
|
-
),
|
|
717
|
-
}
|
|
718
|
-
|
|
719
|
-
nodes = workflow["status"].get("nodes", {})
|
|
720
|
-
dag = {}
|
|
721
|
-
for node in nodes.values():
|
|
722
|
-
name = node["displayName"]
|
|
723
|
-
record = {
|
|
724
|
-
k: node[k] for k in ["phase", "startedAt", "finishedAt", "type", "id"]
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
# snake case
|
|
728
|
-
# align kfp fields to mlrun snake case convention
|
|
729
|
-
# create snake_case for consistency.
|
|
730
|
-
# retain the camelCase for compatibility
|
|
731
|
-
for key in list(record.keys()):
|
|
732
|
-
record[inflection.underscore(key)] = record[key]
|
|
733
|
-
|
|
734
|
-
record["parent"] = node.get("boundaryID", "")
|
|
735
|
-
record["name"] = name
|
|
736
|
-
record["children"] = node.get("children", [])
|
|
737
|
-
if name in templates:
|
|
738
|
-
record["function"] = templates[name].get("function")
|
|
739
|
-
record["run_type"] = templates[name].get("run_type")
|
|
740
|
-
dag[node["id"]] = record
|
|
741
|
-
|
|
742
|
-
return dag, project, workflow["status"].get("message", "")
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
def format_summary_from_kfp_run(
|
|
746
|
-
kfp_run, project=None, run_db: "mlrun.db.RunDBInterface" = None
|
|
747
|
-
):
|
|
748
|
-
override_project = project if project and project != "*" else None
|
|
749
|
-
dag, project, message = generate_kfp_dag_and_resolve_project(
|
|
750
|
-
kfp_run, override_project
|
|
751
|
-
)
|
|
752
|
-
run_id = get_in(kfp_run, "run.id")
|
|
753
|
-
logger.debug("Formatting summary from KFP run", run_id=run_id, project=project)
|
|
754
|
-
|
|
755
|
-
# run db parameter allows us to use the same db session for the whole flow and avoid session isolation issues
|
|
756
|
-
if not run_db:
|
|
757
|
-
run_db = mlrun.db.get_run_db()
|
|
758
|
-
|
|
759
|
-
# enrich DAG with mlrun run info
|
|
760
|
-
runs = run_db.list_runs(project=project, labels=f"workflow={run_id}")
|
|
761
|
-
|
|
762
|
-
for run in runs:
|
|
763
|
-
step = get_in(run, ["metadata", "labels", "mlrun/runner-pod"])
|
|
764
|
-
if step and step in dag:
|
|
765
|
-
dag[step]["run_uid"] = get_in(run, "metadata.uid")
|
|
766
|
-
dag[step]["kind"] = get_in(run, "metadata.labels.kind")
|
|
767
|
-
error = get_in(run, "status.error")
|
|
768
|
-
if error:
|
|
769
|
-
dag[step]["error"] = error
|
|
770
|
-
|
|
771
|
-
short_run = {
|
|
772
|
-
"graph": dag,
|
|
773
|
-
"run": mlrun.utils.helpers.format_run(kfp_run["run"]),
|
|
774
|
-
}
|
|
775
|
-
short_run["run"]["project"] = project
|
|
776
|
-
short_run["run"]["message"] = message
|
|
777
|
-
logger.debug("Completed summary formatting", run_id=run_id, project=project)
|
|
778
|
-
return short_run
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
def show_kfp_run(run, clear_output=False):
|
|
782
|
-
phase_to_color = {
|
|
783
|
-
mlrun.run.RunStatuses.failed: "red",
|
|
784
|
-
mlrun.run.RunStatuses.succeeded: "green",
|
|
785
|
-
mlrun.run.RunStatuses.skipped: "white",
|
|
786
|
-
}
|
|
787
|
-
runtype_to_shape = {
|
|
788
|
-
PipelineRunType.run: "ellipse",
|
|
789
|
-
PipelineRunType.build: "box",
|
|
790
|
-
PipelineRunType.deploy: "box3d",
|
|
791
|
-
}
|
|
792
|
-
if not run or "graph" not in run:
|
|
793
|
-
return
|
|
794
|
-
if is_ipython:
|
|
795
|
-
try:
|
|
796
|
-
from graphviz import Digraph
|
|
797
|
-
except ImportError:
|
|
798
|
-
return
|
|
799
|
-
|
|
800
|
-
try:
|
|
801
|
-
graph = run["graph"]
|
|
802
|
-
dag = Digraph("kfp", format="svg")
|
|
803
|
-
dag.attr(compound="true")
|
|
804
|
-
|
|
805
|
-
for key, node in graph.items():
|
|
806
|
-
if node["type"] != "DAG" or node["parent"]:
|
|
807
|
-
shape = "ellipse"
|
|
808
|
-
if node.get("run_type"):
|
|
809
|
-
shape = runtype_to_shape.get(node["run_type"], None)
|
|
810
|
-
elif node["phase"] == "Skipped" or (
|
|
811
|
-
node["type"] == "DAG" and node["name"].startswith("condition-")
|
|
812
|
-
):
|
|
813
|
-
shape = "diamond"
|
|
814
|
-
dag.node(
|
|
815
|
-
key,
|
|
816
|
-
label=node["name"],
|
|
817
|
-
fillcolor=phase_to_color.get(node["phase"], None),
|
|
818
|
-
style="filled",
|
|
819
|
-
shape=shape,
|
|
820
|
-
tooltip=node.get("error", None),
|
|
821
|
-
)
|
|
822
|
-
for child in node.get("children") or []:
|
|
823
|
-
dag.edge(key, child)
|
|
824
|
-
|
|
825
|
-
import IPython
|
|
826
|
-
|
|
827
|
-
if clear_output:
|
|
828
|
-
IPython.display.clear_output(wait=True)
|
|
829
|
-
|
|
830
|
-
run_id = run["run"]["id"]
|
|
831
|
-
url = get_workflow_url(run["run"]["project"], run_id)
|
|
832
|
-
href = f'<a href="{url}" target="_blank"><b>click here</b></a>'
|
|
833
|
-
html = IPython.display.HTML(
|
|
834
|
-
f"<div>Pipeline running (id={run_id}), {href} to view the details in MLRun UI</div>"
|
|
835
|
-
)
|
|
836
|
-
IPython.display.display(html, dag)
|
|
837
|
-
except Exception as exc:
|
|
838
|
-
logger.warning(f"failed to plot graph, {err_to_str(exc)}")
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
def add_default_function_resources(
|
|
842
|
-
container_op: dsl.ContainerOp,
|
|
843
|
-
) -> dsl.ContainerOp:
|
|
844
|
-
default_resources = config.get_default_function_pod_resources()
|
|
845
|
-
for resource_name, resource_value in default_resources["requests"].items():
|
|
846
|
-
if resource_value:
|
|
847
|
-
container_op.container.add_resource_request(resource_name, resource_value)
|
|
848
|
-
|
|
849
|
-
for resource_name, resource_value in default_resources["limits"].items():
|
|
850
|
-
if resource_value:
|
|
851
|
-
container_op.container.add_resource_limit(resource_name, resource_value)
|
|
852
|
-
return container_op
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
def add_function_node_selection_attributes(
|
|
856
|
-
function, container_op: dsl.ContainerOp
|
|
857
|
-
) -> dsl.ContainerOp:
|
|
858
|
-
if not mlrun.runtimes.RuntimeKinds.is_local_runtime(function.kind):
|
|
859
|
-
if getattr(function.spec, "node_selector"):
|
|
860
|
-
container_op.node_selector = function.spec.node_selector
|
|
861
|
-
|
|
862
|
-
if getattr(function.spec, "tolerations"):
|
|
863
|
-
container_op.tolerations = function.spec.tolerations
|
|
864
|
-
|
|
865
|
-
if getattr(function.spec, "affinity"):
|
|
866
|
-
container_op.affinity = function.spec.affinity
|
|
867
|
-
|
|
868
|
-
return container_op
|