ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +10 -3
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +4 -0
- metaflow/cli.py +125 -21
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +204 -40
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +198 -130
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/cmd/develop/stub_generator.py +49 -18
- metaflow/cmd/develop/stubs.py +9 -27
- metaflow/cmd/make_wrapper.py +30 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +124 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +92 -6
- metaflow/debug.py +5 -0
- metaflow/decorators.py +331 -82
- metaflow/extension_support/__init__.py +414 -356
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +322 -82
- metaflow/graph.py +178 -15
- metaflow/includefile.py +25 -3
- metaflow/lint.py +94 -3
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +13 -2
- metaflow/metaflow_config.py +66 -4
- metaflow/metaflow_environment.py +91 -25
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +16 -1
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +6 -2
- metaflow/plugins/__init__.py +6 -0
- metaflow/plugins/airflow/airflow.py +11 -1
- metaflow/plugins/airflow/airflow_cli.py +16 -5
- metaflow/plugins/argo/argo_client.py +42 -20
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +1023 -344
- metaflow/plugins/argo/argo_workflows_cli.py +396 -94
- metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
- metaflow/plugins/argo/capture_error.py +5 -2
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +6 -0
- metaflow/plugins/aws/aws_utils.py +33 -1
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +24 -3
- metaflow/plugins/aws/batch/batch_decorator.py +57 -6
- metaflow/plugins/aws/step_functions/step_functions.py +28 -3
- metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_cli.py +20 -1
- metaflow/plugins/cards/card_creator.py +24 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +58 -6
- metaflow/plugins/cards/card_modules/basic.py +38 -9
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +592 -3
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +56 -41
- metaflow/plugins/cards/card_modules/test_cards.py +22 -6
- metaflow/plugins/cards/component_serializer.py +1 -8
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +49 -17
- metaflow/plugins/datatools/s3/s3op.py +113 -66
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/events_decorator.py +127 -121
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/kubernetes/kubernetes.py +12 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
- metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/service.py +13 -9
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +11 -2
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/pypi/bootstrap.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +41 -82
- metaflow/plugins/pypi/conda_environment.py +14 -6
- metaflow/plugins/pypi/micromamba.py +9 -1
- metaflow/plugins/pypi/pip.py +41 -5
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/pypi/utils.py +22 -0
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +14 -178
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +29 -1
- metaflow/plugins/uv/uv_environment.py +5 -3
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +79 -26
- metaflow/runner/deployer.py +208 -6
- metaflow/runner/deployer_impl.py +32 -12
- metaflow/runner/metaflow_runner.py +266 -33
- metaflow/runner/subprocess_manager.py +21 -1
- metaflow/runner/utils.py +27 -16
- metaflow/runtime.py +660 -66
- metaflow/task.py +255 -26
- metaflow/user_configs/config_options.py +33 -21
- metaflow/user_configs/config_parameters.py +220 -58
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +197 -7
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/_vendor/v3_5/zipp.py +0 -329
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -90,6 +90,7 @@ class Kubernetes(object):
|
|
|
90
90
|
step_name,
|
|
91
91
|
task_id,
|
|
92
92
|
attempt,
|
|
93
|
+
code_package_metadata,
|
|
93
94
|
code_package_url,
|
|
94
95
|
step_cmds,
|
|
95
96
|
):
|
|
@@ -104,7 +105,7 @@ class Kubernetes(object):
|
|
|
104
105
|
stderr_path=STDERR_PATH,
|
|
105
106
|
)
|
|
106
107
|
init_cmds = self._environment.get_package_commands(
|
|
107
|
-
code_package_url, self._datastore.TYPE
|
|
108
|
+
code_package_url, self._datastore.TYPE, code_package_metadata
|
|
108
109
|
)
|
|
109
110
|
init_expr = " && ".join(init_cmds)
|
|
110
111
|
step_expr = bash_capture_logs(
|
|
@@ -165,11 +166,13 @@ class Kubernetes(object):
|
|
|
165
166
|
task_id,
|
|
166
167
|
attempt,
|
|
167
168
|
user,
|
|
169
|
+
code_package_metadata,
|
|
168
170
|
code_package_sha,
|
|
169
171
|
code_package_url,
|
|
170
172
|
code_package_ds,
|
|
171
173
|
docker_image,
|
|
172
174
|
docker_image_pull_policy,
|
|
175
|
+
image_pull_secrets=None,
|
|
173
176
|
step_cli=None,
|
|
174
177
|
service_account=None,
|
|
175
178
|
secrets=None,
|
|
@@ -206,6 +209,7 @@ class Kubernetes(object):
|
|
|
206
209
|
node_selector=node_selector,
|
|
207
210
|
image=docker_image,
|
|
208
211
|
image_pull_policy=docker_image_pull_policy,
|
|
212
|
+
image_pull_secrets=image_pull_secrets,
|
|
209
213
|
cpu=cpu,
|
|
210
214
|
memory=memory,
|
|
211
215
|
disk=disk,
|
|
@@ -230,6 +234,7 @@ class Kubernetes(object):
|
|
|
230
234
|
qos=qos,
|
|
231
235
|
security_context=security_context,
|
|
232
236
|
)
|
|
237
|
+
.environment_variable("METAFLOW_CODE_METADATA", code_package_metadata)
|
|
233
238
|
.environment_variable("METAFLOW_CODE_SHA", code_package_sha)
|
|
234
239
|
.environment_variable("METAFLOW_CODE_URL", code_package_url)
|
|
235
240
|
.environment_variable("METAFLOW_CODE_DS", code_package_ds)
|
|
@@ -429,6 +434,7 @@ class Kubernetes(object):
|
|
|
429
434
|
step_name=step_name,
|
|
430
435
|
task_id=_tskid,
|
|
431
436
|
attempt=attempt,
|
|
437
|
+
code_package_metadata=code_package_metadata,
|
|
432
438
|
code_package_url=code_package_url,
|
|
433
439
|
step_cmds=[
|
|
434
440
|
step_cli.replace(
|
|
@@ -477,12 +483,14 @@ class Kubernetes(object):
|
|
|
477
483
|
task_id,
|
|
478
484
|
attempt,
|
|
479
485
|
user,
|
|
486
|
+
code_package_metadata,
|
|
480
487
|
code_package_sha,
|
|
481
488
|
code_package_url,
|
|
482
489
|
code_package_ds,
|
|
483
490
|
step_cli,
|
|
484
491
|
docker_image,
|
|
485
492
|
docker_image_pull_policy,
|
|
493
|
+
image_pull_secrets=None,
|
|
486
494
|
service_account=None,
|
|
487
495
|
secrets=None,
|
|
488
496
|
node_selector=None,
|
|
@@ -524,11 +532,13 @@ class Kubernetes(object):
|
|
|
524
532
|
step_name=step_name,
|
|
525
533
|
task_id=task_id,
|
|
526
534
|
attempt=attempt,
|
|
535
|
+
code_package_metadata=code_package_metadata,
|
|
527
536
|
code_package_url=code_package_url,
|
|
528
537
|
step_cmds=[step_cli],
|
|
529
538
|
),
|
|
530
539
|
image=docker_image,
|
|
531
540
|
image_pull_policy=docker_image_pull_policy,
|
|
541
|
+
image_pull_secrets=image_pull_secrets,
|
|
532
542
|
cpu=cpu,
|
|
533
543
|
memory=memory,
|
|
534
544
|
disk=disk,
|
|
@@ -551,6 +561,7 @@ class Kubernetes(object):
|
|
|
551
561
|
qos=qos,
|
|
552
562
|
security_context=security_context,
|
|
553
563
|
)
|
|
564
|
+
.environment_variable("METAFLOW_CODE_METADATA", code_package_metadata)
|
|
554
565
|
.environment_variable("METAFLOW_CODE_SHA", code_package_sha)
|
|
555
566
|
.environment_variable("METAFLOW_CODE_URL", code_package_url)
|
|
556
567
|
.environment_variable("METAFLOW_CODE_DS", code_package_ds)
|
|
@@ -41,6 +41,7 @@ def kubernetes():
|
|
|
41
41
|
)
|
|
42
42
|
@tracing.cli("kubernetes/step")
|
|
43
43
|
@click.argument("step-name")
|
|
44
|
+
@click.argument("code-package-metadata")
|
|
44
45
|
@click.argument("code-package-sha")
|
|
45
46
|
@click.argument("code-package-url")
|
|
46
47
|
@click.option(
|
|
@@ -53,6 +54,12 @@ def kubernetes():
|
|
|
53
54
|
default=None,
|
|
54
55
|
help="Optional Docker Image Pull Policy for Kubernetes pod.",
|
|
55
56
|
)
|
|
57
|
+
@click.option(
|
|
58
|
+
"--image-pull-secrets",
|
|
59
|
+
default=None,
|
|
60
|
+
type=JSONTypeClass(),
|
|
61
|
+
multiple=False,
|
|
62
|
+
)
|
|
56
63
|
@click.option(
|
|
57
64
|
"--service-account",
|
|
58
65
|
help="IRSA requirement for Kubernetes pod.",
|
|
@@ -155,11 +162,13 @@ def kubernetes():
|
|
|
155
162
|
def step(
|
|
156
163
|
ctx,
|
|
157
164
|
step_name,
|
|
165
|
+
code_package_metadata,
|
|
158
166
|
code_package_sha,
|
|
159
167
|
code_package_url,
|
|
160
168
|
executable=None,
|
|
161
169
|
image=None,
|
|
162
170
|
image_pull_policy=None,
|
|
171
|
+
image_pull_secrets=None,
|
|
163
172
|
service_account=None,
|
|
164
173
|
secrets=None,
|
|
165
174
|
node_selector=None,
|
|
@@ -297,12 +306,14 @@ def step(
|
|
|
297
306
|
task_id=task_id,
|
|
298
307
|
attempt=str(retry_count),
|
|
299
308
|
user=util.get_username(),
|
|
309
|
+
code_package_metadata=code_package_metadata,
|
|
300
310
|
code_package_sha=code_package_sha,
|
|
301
311
|
code_package_url=code_package_url,
|
|
302
312
|
code_package_ds=ctx.obj.flow_datastore.TYPE,
|
|
303
313
|
step_cli=step_cli,
|
|
304
314
|
docker_image=image,
|
|
305
315
|
docker_image_pull_policy=image_pull_policy,
|
|
316
|
+
image_pull_secrets=image_pull_secrets,
|
|
306
317
|
service_account=service_account,
|
|
307
318
|
secrets=secrets,
|
|
308
319
|
node_selector=node_selector,
|
|
@@ -11,6 +11,7 @@ from metaflow.metadata_provider import MetaDatum
|
|
|
11
11
|
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
|
|
12
12
|
from metaflow.metaflow_config import (
|
|
13
13
|
DATASTORE_LOCAL_DIR,
|
|
14
|
+
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
|
|
14
15
|
KUBERNETES_CONTAINER_IMAGE,
|
|
15
16
|
KUBERNETES_CONTAINER_REGISTRY,
|
|
16
17
|
KUBERNETES_CPU,
|
|
@@ -18,6 +19,7 @@ from metaflow.metaflow_config import (
|
|
|
18
19
|
KUBERNETES_FETCH_EC2_METADATA,
|
|
19
20
|
KUBERNETES_GPU_VENDOR,
|
|
20
21
|
KUBERNETES_IMAGE_PULL_POLICY,
|
|
22
|
+
KUBERNETES_IMAGE_PULL_SECRETS,
|
|
21
23
|
KUBERNETES_MEMORY,
|
|
22
24
|
KUBERNETES_LABELS,
|
|
23
25
|
KUBERNETES_ANNOTATIONS,
|
|
@@ -74,6 +76,10 @@ class KubernetesDecorator(StepDecorator):
|
|
|
74
76
|
not, a default Docker image mapping to the current version of Python is used.
|
|
75
77
|
image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
|
|
76
78
|
If given, the imagePullPolicy to be applied to the Docker image of the step.
|
|
79
|
+
image_pull_secrets: List[str], default []
|
|
80
|
+
The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
|
|
81
|
+
Kubernetes image pull secrets to use when pulling container images
|
|
82
|
+
in Kubernetes.
|
|
77
83
|
service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
|
|
78
84
|
Kubernetes service account to use when launching pod in Kubernetes.
|
|
79
85
|
secrets : List[str], optional, default None
|
|
@@ -92,7 +98,7 @@ class KubernetesDecorator(StepDecorator):
|
|
|
92
98
|
the scheduled node should not have GPUs.
|
|
93
99
|
gpu_vendor : str, default KUBERNETES_GPU_VENDOR
|
|
94
100
|
The vendor of the GPUs to be used for this step.
|
|
95
|
-
tolerations : List[str], default []
|
|
101
|
+
tolerations : List[Dict[str,str]], default []
|
|
96
102
|
The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
|
|
97
103
|
Kubernetes tolerations to use when launching pod in Kubernetes.
|
|
98
104
|
labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
|
|
@@ -141,6 +147,7 @@ class KubernetesDecorator(StepDecorator):
|
|
|
141
147
|
"disk": "10240",
|
|
142
148
|
"image": None,
|
|
143
149
|
"image_pull_policy": None,
|
|
150
|
+
"image_pull_secrets": None, # e.g., ["regcred"]
|
|
144
151
|
"service_account": None,
|
|
145
152
|
"secrets": None, # e.g., mysecret
|
|
146
153
|
"node_selector": None, # e.g., kubernetes.io/os=linux
|
|
@@ -164,6 +171,7 @@ class KubernetesDecorator(StepDecorator):
|
|
|
164
171
|
"qos": KUBERNETES_QOS,
|
|
165
172
|
"security_context": None,
|
|
166
173
|
}
|
|
174
|
+
package_metadata = None
|
|
167
175
|
package_url = None
|
|
168
176
|
package_sha = None
|
|
169
177
|
run_time_limit = None
|
|
@@ -173,8 +181,6 @@ class KubernetesDecorator(StepDecorator):
|
|
|
173
181
|
target_platform = KUBERNETES_CONDA_ARCH or "linux-64"
|
|
174
182
|
|
|
175
183
|
def init(self):
|
|
176
|
-
super(KubernetesDecorator, self).init()
|
|
177
|
-
|
|
178
184
|
if not self.attributes["namespace"]:
|
|
179
185
|
self.attributes["namespace"] = KUBERNETES_NAMESPACE
|
|
180
186
|
if not self.attributes["service_account"]:
|
|
@@ -194,6 +200,10 @@ class KubernetesDecorator(StepDecorator):
|
|
|
194
200
|
)
|
|
195
201
|
if not self.attributes["image_pull_policy"] and KUBERNETES_IMAGE_PULL_POLICY:
|
|
196
202
|
self.attributes["image_pull_policy"] = KUBERNETES_IMAGE_PULL_POLICY
|
|
203
|
+
if not self.attributes["image_pull_secrets"] and KUBERNETES_IMAGE_PULL_SECRETS:
|
|
204
|
+
self.attributes["image_pull_secrets"] = json.loads(
|
|
205
|
+
KUBERNETES_IMAGE_PULL_SECRETS
|
|
206
|
+
)
|
|
197
207
|
|
|
198
208
|
if isinstance(self.attributes["node_selector"], str):
|
|
199
209
|
self.attributes["node_selector"] = parse_kube_keyvalue_list(
|
|
@@ -476,6 +486,7 @@ class KubernetesDecorator(StepDecorator):
|
|
|
476
486
|
# to execute on Kubernetes anymore. We can execute possible fallback
|
|
477
487
|
# code locally.
|
|
478
488
|
cli_args.commands = ["kubernetes", "step"]
|
|
489
|
+
cli_args.command_args.append(self.package_metadata)
|
|
479
490
|
cli_args.command_args.append(self.package_sha)
|
|
480
491
|
cli_args.command_args.append(self.package_url)
|
|
481
492
|
|
|
@@ -494,6 +505,7 @@ class KubernetesDecorator(StepDecorator):
|
|
|
494
505
|
for key, val in v.items()
|
|
495
506
|
]
|
|
496
507
|
elif k in [
|
|
508
|
+
"image_pull_secrets",
|
|
497
509
|
"tolerations",
|
|
498
510
|
"persistent_volume_claims",
|
|
499
511
|
"labels",
|
|
@@ -646,9 +658,16 @@ class KubernetesDecorator(StepDecorator):
|
|
|
646
658
|
@classmethod
|
|
647
659
|
def _save_package_once(cls, flow_datastore, package):
|
|
648
660
|
if cls.package_url is None:
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
661
|
+
if not FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
|
|
662
|
+
cls.package_url, cls.package_sha = flow_datastore.save_data(
|
|
663
|
+
[package.blob], len_hint=1
|
|
664
|
+
)[0]
|
|
665
|
+
cls.package_metadata = package.package_metadata
|
|
666
|
+
else:
|
|
667
|
+
# Blocks until the package is uploaded
|
|
668
|
+
cls.package_url = package.package_url()
|
|
669
|
+
cls.package_sha = package.package_sha()
|
|
670
|
+
cls.package_metadata = package.package_metadata
|
|
652
671
|
|
|
653
672
|
|
|
654
673
|
# TODO: Unify this method with the multi-node setup in @batch
|
|
@@ -235,8 +235,10 @@ class KubernetesJob(object):
|
|
|
235
235
|
)
|
|
236
236
|
],
|
|
237
237
|
node_selector=self._kwargs.get("node_selector"),
|
|
238
|
-
|
|
239
|
-
|
|
238
|
+
image_pull_secrets=[
|
|
239
|
+
client.V1LocalObjectReference(secret)
|
|
240
|
+
for secret in self._kwargs.get("image_pull_secrets") or []
|
|
241
|
+
],
|
|
240
242
|
# TODO (savin): Support preemption policies
|
|
241
243
|
# preemption_policy=?,
|
|
242
244
|
#
|
|
@@ -520,12 +522,10 @@ class RunningJob(object):
|
|
|
520
522
|
# 3. If the pod object hasn't shown up yet, we set the parallelism to 0
|
|
521
523
|
# to preempt it.
|
|
522
524
|
client = self._client.get()
|
|
523
|
-
|
|
524
525
|
if not self.is_done:
|
|
525
526
|
if self.is_running:
|
|
526
527
|
# Case 1.
|
|
527
528
|
from kubernetes.stream import stream
|
|
528
|
-
|
|
529
529
|
api_instance = client.CoreV1Api
|
|
530
530
|
try:
|
|
531
531
|
# TODO: stream opens a web-socket connection. It may
|
|
@@ -591,6 +591,10 @@ class RunningJob(object):
|
|
|
591
591
|
return self.id
|
|
592
592
|
return "job %s" % self._name
|
|
593
593
|
|
|
594
|
+
@property
|
|
595
|
+
def is_unschedulable(self):
|
|
596
|
+
return self._job["metadata"]["annotations"].get("metaflow/job_status", "") == "Unsatisfiable_Resource_Request"
|
|
597
|
+
|
|
594
598
|
@property
|
|
595
599
|
def is_done(self):
|
|
596
600
|
# Check if the container is done. As a side effect, also refreshes self._job and
|
|
@@ -604,6 +608,7 @@ class RunningJob(object):
|
|
|
604
608
|
or bool(self._job["status"].get("failed"))
|
|
605
609
|
or self._are_pod_containers_done
|
|
606
610
|
or (self._job["spec"]["parallelism"] == 0)
|
|
611
|
+
or self.is_unschedulable
|
|
607
612
|
)
|
|
608
613
|
|
|
609
614
|
if not done():
|
|
@@ -661,6 +666,7 @@ class RunningJob(object):
|
|
|
661
666
|
bool(self._job["status"].get("failed"))
|
|
662
667
|
or self._has_any_container_failed
|
|
663
668
|
or (self._job["spec"]["parallelism"] == 0)
|
|
669
|
+
or self.is_unschedulable
|
|
664
670
|
)
|
|
665
671
|
return retval
|
|
666
672
|
|
|
@@ -758,6 +764,8 @@ class RunningJob(object):
|
|
|
758
764
|
return 0, None
|
|
759
765
|
# Best effort since Pod object can disappear on us at anytime
|
|
760
766
|
else:
|
|
767
|
+
if self.is_unschedulable:
|
|
768
|
+
return 1, self._job["metadata"]["annotations"].get("metaflow/job_status_reason", "")
|
|
761
769
|
if self._pod.get("status", {}).get("phase") not in (
|
|
762
770
|
"Succeeded",
|
|
763
771
|
"Failed",
|
|
@@ -6,6 +6,7 @@ from collections import namedtuple
|
|
|
6
6
|
from metaflow.exception import MetaflowException
|
|
7
7
|
from metaflow.metaflow_config import KUBERNETES_JOBSET_GROUP, KUBERNETES_JOBSET_VERSION
|
|
8
8
|
from metaflow.tracing import inject_tracing_vars
|
|
9
|
+
from metaflow._vendor import yaml
|
|
9
10
|
|
|
10
11
|
from .kube_utils import qos_requests_and_limits
|
|
11
12
|
|
|
@@ -718,8 +719,11 @@ class JobSetSpec(object):
|
|
|
718
719
|
)
|
|
719
720
|
],
|
|
720
721
|
node_selector=self._kwargs.get("node_selector"),
|
|
721
|
-
|
|
722
|
-
|
|
722
|
+
image_pull_secrets=[
|
|
723
|
+
client.V1LocalObjectReference(secret)
|
|
724
|
+
for secret in self._kwargs.get("image_pull_secrets")
|
|
725
|
+
or []
|
|
726
|
+
],
|
|
723
727
|
# TODO (savin): Support preemption policies
|
|
724
728
|
# preemption_policy=?,
|
|
725
729
|
#
|
|
@@ -1022,34 +1026,32 @@ class KubernetesArgoJobSet(object):
|
|
|
1022
1026
|
|
|
1023
1027
|
def dump(self):
|
|
1024
1028
|
client = self._kubernetes_sdk
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
),
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
),
|
|
1049
|
-
status=None,
|
|
1050
|
-
)
|
|
1029
|
+
js_dict = client.ApiClient().sanitize_for_serialization(
|
|
1030
|
+
dict(
|
|
1031
|
+
apiVersion=self._group + "/" + self._version,
|
|
1032
|
+
kind="JobSet",
|
|
1033
|
+
metadata=client.api_client.ApiClient().sanitize_for_serialization(
|
|
1034
|
+
client.V1ObjectMeta(
|
|
1035
|
+
name=self.name,
|
|
1036
|
+
labels=self._labels,
|
|
1037
|
+
annotations=self._annotations,
|
|
1038
|
+
)
|
|
1039
|
+
),
|
|
1040
|
+
spec=dict(
|
|
1041
|
+
replicatedJobs=[self.control.dump(), self.worker.dump()],
|
|
1042
|
+
suspend=False,
|
|
1043
|
+
startupPolicy=None,
|
|
1044
|
+
successPolicy=None,
|
|
1045
|
+
# The Failure Policy helps setting the number of retries for the jobset.
|
|
1046
|
+
# but we don't rely on it and instead rely on either the local scheduler
|
|
1047
|
+
# or the Argo Workflows to handle retries.
|
|
1048
|
+
failurePolicy=None,
|
|
1049
|
+
network=None,
|
|
1050
|
+
),
|
|
1051
|
+
status=None,
|
|
1051
1052
|
)
|
|
1052
1053
|
)
|
|
1054
|
+
data = yaml.dump(js_dict, default_flow_style=False, indent=2)
|
|
1053
1055
|
# The values we populate in the Jobset manifest (for Argo Workflows) piggybacks on the Argo Workflow's templating engine.
|
|
1054
1056
|
# Even though Argo Workflows's templating helps us constructing all the necessary IDs and populating the fields
|
|
1055
1057
|
# required by Metaflow, we run into one glitch. When we construct JSON/YAML serializable objects,
|
|
@@ -1064,7 +1066,6 @@ class KubernetesArgoJobSet(object):
|
|
|
1064
1066
|
# Since the value of `num_parallel` can be dynamic and can change from run to run, we need to ensure that the
|
|
1065
1067
|
# value can be passed-down dynamically and is **explicitly set as a integer** in the Jobset Manifest submitted as a
|
|
1066
1068
|
# part of the Argo Workflow
|
|
1067
|
-
|
|
1068
|
-
quoted_substring = '"{{=asInt(inputs.parameters.workerCount)}}"'
|
|
1069
|
+
quoted_substring = "'{{=asInt(inputs.parameters.workerCount)}}'"
|
|
1069
1070
|
unquoted_substring = "{{=asInt(inputs.parameters.workerCount)}}"
|
|
1070
1071
|
return data.replace(quoted_substring, unquoted_substring)
|