metaflow 2.13__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +2 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +35 -21
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/version.py +1 -1
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/RECORD +34 -34
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,14 @@
|
|
1
|
-
|
1
|
+
import re
|
2
|
+
from typing import Dict, List, Optional
|
3
|
+
from metaflow.exception import CommandException, MetaflowException
|
2
4
|
from metaflow.util import get_username, get_latest_run_id
|
3
5
|
|
4
6
|
|
7
|
+
# avoid circular import by having the exception class contained here
|
8
|
+
class KubernetesException(MetaflowException):
|
9
|
+
headline = "Kubernetes error"
|
10
|
+
|
11
|
+
|
5
12
|
def parse_cli_options(flow_name, run_id, user, my_runs, echo):
|
6
13
|
if user and my_runs:
|
7
14
|
raise CommandException("--user and --my-runs are mutually exclusive.")
|
@@ -52,3 +59,50 @@ def qos_requests_and_limits(qos: str, cpu: int, memory: int, storage: int):
|
|
52
59
|
# TODO: Add support for BestEffort once there is a use case for it.
|
53
60
|
# BestEffort - no limit or requests for cpu/memory
|
54
61
|
return qos_requests, qos_limits
|
62
|
+
|
63
|
+
|
64
|
+
def validate_kube_labels(
|
65
|
+
labels: Optional[Dict[str, Optional[str]]],
|
66
|
+
) -> bool:
|
67
|
+
"""Validate label values.
|
68
|
+
|
69
|
+
This validates the kubernetes label values. It does not validate the keys.
|
70
|
+
Ideally, keys should be static and also the validation rules for keys are
|
71
|
+
more complex than those for values. For full validation rules, see:
|
72
|
+
|
73
|
+
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
74
|
+
"""
|
75
|
+
|
76
|
+
def validate_label(s: Optional[str]):
|
77
|
+
regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
|
78
|
+
if not s:
|
79
|
+
# allow empty label
|
80
|
+
return True
|
81
|
+
if not re.search(regex_match, s):
|
82
|
+
raise KubernetesException(
|
83
|
+
'Invalid value: "%s"\n'
|
84
|
+
"A valid label must be an empty string or one that\n"
|
85
|
+
" - Consist of alphanumeric, '-', '_' or '.' characters\n"
|
86
|
+
" - Begins and ends with an alphanumeric character\n"
|
87
|
+
" - Is at most 63 characters" % s
|
88
|
+
)
|
89
|
+
return True
|
90
|
+
|
91
|
+
return all([validate_label(v) for v in labels.values()]) if labels else True
|
92
|
+
|
93
|
+
|
94
|
+
def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
|
95
|
+
try:
|
96
|
+
ret = {}
|
97
|
+
for item_str in items:
|
98
|
+
item = item_str.split("=", 1)
|
99
|
+
if requires_both:
|
100
|
+
item[1] # raise IndexError
|
101
|
+
if str(item[0]) in ret:
|
102
|
+
raise KubernetesException("Duplicate key found: %s" % str(item[0]))
|
103
|
+
ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
|
104
|
+
return ret
|
105
|
+
except KubernetesException as e:
|
106
|
+
raise e
|
107
|
+
except (AttributeError, IndexError):
|
108
|
+
raise KubernetesException("Unable to parse kubernetes list: %s" % items)
|
@@ -1,11 +1,8 @@
|
|
1
|
-
import copy
|
2
1
|
import json
|
3
2
|
import math
|
4
3
|
import os
|
5
|
-
import re
|
6
4
|
import shlex
|
7
5
|
import time
|
8
|
-
from typing import Dict, List, Optional
|
9
6
|
from uuid import uuid4
|
10
7
|
|
11
8
|
from metaflow import current, util
|
@@ -35,7 +32,6 @@ from metaflow.metaflow_config import (
|
|
35
32
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
36
33
|
GCP_SECRET_MANAGER_PREFIX,
|
37
34
|
KUBERNETES_FETCH_EC2_METADATA,
|
38
|
-
KUBERNETES_LABELS,
|
39
35
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
40
36
|
OTEL_ENDPOINT,
|
41
37
|
S3_ENDPOINT_URL,
|
@@ -193,6 +189,7 @@ class Kubernetes(object):
|
|
193
189
|
persistent_volume_claims=None,
|
194
190
|
tolerations=None,
|
195
191
|
labels=None,
|
192
|
+
annotations=None,
|
196
193
|
shared_memory=None,
|
197
194
|
port=None,
|
198
195
|
num_parallel=None,
|
@@ -304,10 +301,6 @@ class Kubernetes(object):
|
|
304
301
|
# see get_datastore_root_from_config in datastore/local.py).
|
305
302
|
)
|
306
303
|
|
307
|
-
_labels = self._get_labels(labels)
|
308
|
-
for k, v in _labels.items():
|
309
|
-
jobset.label(k, v)
|
310
|
-
|
311
304
|
for k in list(
|
312
305
|
[] if not secrets else [secrets] if isinstance(secrets, str) else secrets
|
313
306
|
) + KUBERNETES_SECRETS.split(","):
|
@@ -379,13 +372,16 @@ class Kubernetes(object):
|
|
379
372
|
for name, value in env.items():
|
380
373
|
jobset.environment_variable(name, value)
|
381
374
|
|
382
|
-
|
375
|
+
system_annotations = {
|
383
376
|
"metaflow/user": user,
|
384
377
|
"metaflow/flow_name": flow_name,
|
385
378
|
"metaflow/control-task-id": task_id,
|
379
|
+
"metaflow/run_id": run_id,
|
380
|
+
"metaflow/step_name": step_name,
|
381
|
+
"metaflow/attempt": attempt,
|
386
382
|
}
|
387
383
|
if current.get("project_name"):
|
388
|
-
|
384
|
+
system_annotations.update(
|
389
385
|
{
|
390
386
|
"metaflow/project_name": current.project_name,
|
391
387
|
"metaflow/branch_name": current.branch_name,
|
@@ -393,15 +389,15 @@ class Kubernetes(object):
|
|
393
389
|
}
|
394
390
|
)
|
395
391
|
|
396
|
-
|
397
|
-
|
392
|
+
system_labels = {
|
393
|
+
"app.kubernetes.io/name": "metaflow-task",
|
394
|
+
"app.kubernetes.io/part-of": "metaflow",
|
395
|
+
}
|
398
396
|
|
399
|
-
(
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
.label("app.kubernetes.io/name", "metaflow-task")
|
404
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
397
|
+
jobset.labels({**({} if not labels else labels), **system_labels})
|
398
|
+
|
399
|
+
jobset.annotations(
|
400
|
+
{**({} if not annotations else annotations), **system_annotations}
|
405
401
|
)
|
406
402
|
# We need this task-id set so that all the nodes are aware of the control
|
407
403
|
# task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
|
@@ -491,6 +487,7 @@ class Kubernetes(object):
|
|
491
487
|
port=None,
|
492
488
|
name_pattern=None,
|
493
489
|
qos=None,
|
490
|
+
annotations=None,
|
494
491
|
):
|
495
492
|
if env is None:
|
496
493
|
env = {}
|
@@ -523,7 +520,8 @@ class Kubernetes(object):
|
|
523
520
|
retries=0,
|
524
521
|
step_name=step_name,
|
525
522
|
tolerations=tolerations,
|
526
|
-
labels=
|
523
|
+
labels=labels,
|
524
|
+
annotations=annotations,
|
527
525
|
use_tmpfs=use_tmpfs,
|
528
526
|
tmpfs_tempdir=tmpfs_tempdir,
|
529
527
|
tmpfs_size=tmpfs_size,
|
@@ -642,13 +640,25 @@ class Kubernetes(object):
|
|
642
640
|
|
643
641
|
for name, value in env.items():
|
644
642
|
job.environment_variable(name, value)
|
643
|
+
# Add job specific labels
|
644
|
+
system_labels = {
|
645
|
+
"app.kubernetes.io/name": "metaflow-task",
|
646
|
+
"app.kubernetes.io/part-of": "metaflow",
|
647
|
+
}
|
648
|
+
for name, value in system_labels.items():
|
649
|
+
job.label(name, value)
|
645
650
|
|
646
|
-
annotations
|
647
|
-
|
651
|
+
# Add job specific annotations not set in the decorator.
|
652
|
+
system_annotations = {
|
648
653
|
"metaflow/flow_name": flow_name,
|
654
|
+
"metaflow/run_id": run_id,
|
655
|
+
"metaflow/step_name": step_name,
|
656
|
+
"metaflow/task_id": task_id,
|
657
|
+
"metaflow/attempt": attempt,
|
658
|
+
"metaflow/user": user,
|
649
659
|
}
|
650
660
|
if current.get("project_name"):
|
651
|
-
|
661
|
+
system_annotations.update(
|
652
662
|
{
|
653
663
|
"metaflow/project_name": current.project_name,
|
654
664
|
"metaflow/branch_name": current.branch_name,
|
@@ -656,7 +666,7 @@ class Kubernetes(object):
|
|
656
666
|
}
|
657
667
|
)
|
658
668
|
|
659
|
-
for name, value in
|
669
|
+
for name, value in system_annotations.items():
|
660
670
|
job.annotation(name, value)
|
661
671
|
|
662
672
|
(
|
@@ -775,60 +785,3 @@ class Kubernetes(object):
|
|
775
785
|
"stderr",
|
776
786
|
job_id=self._job.id,
|
777
787
|
)
|
778
|
-
|
779
|
-
@staticmethod
|
780
|
-
def _get_labels(extra_labels=None):
|
781
|
-
if extra_labels is None:
|
782
|
-
extra_labels = {}
|
783
|
-
env_labels = KUBERNETES_LABELS.split(",") if KUBERNETES_LABELS else []
|
784
|
-
env_labels = parse_kube_keyvalue_list(env_labels, False)
|
785
|
-
labels = {**env_labels, **extra_labels}
|
786
|
-
validate_kube_labels(labels)
|
787
|
-
return labels
|
788
|
-
|
789
|
-
|
790
|
-
def validate_kube_labels(
|
791
|
-
labels: Optional[Dict[str, Optional[str]]],
|
792
|
-
) -> bool:
|
793
|
-
"""Validate label values.
|
794
|
-
|
795
|
-
This validates the kubernetes label values. It does not validate the keys.
|
796
|
-
Ideally, keys should be static and also the validation rules for keys are
|
797
|
-
more complex than those for values. For full validation rules, see:
|
798
|
-
|
799
|
-
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
800
|
-
"""
|
801
|
-
|
802
|
-
def validate_label(s: Optional[str]):
|
803
|
-
regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
|
804
|
-
if not s:
|
805
|
-
# allow empty label
|
806
|
-
return True
|
807
|
-
if not re.search(regex_match, s):
|
808
|
-
raise KubernetesException(
|
809
|
-
'Invalid value: "%s"\n'
|
810
|
-
"A valid label must be an empty string or one that\n"
|
811
|
-
" - Consist of alphanumeric, '-', '_' or '.' characters\n"
|
812
|
-
" - Begins and ends with an alphanumeric character\n"
|
813
|
-
" - Is at most 63 characters" % s
|
814
|
-
)
|
815
|
-
return True
|
816
|
-
|
817
|
-
return all([validate_label(v) for v in labels.values()]) if labels else True
|
818
|
-
|
819
|
-
|
820
|
-
def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
|
821
|
-
try:
|
822
|
-
ret = {}
|
823
|
-
for item_str in items:
|
824
|
-
item = item_str.split("=", 1)
|
825
|
-
if requires_both:
|
826
|
-
item[1] # raise IndexError
|
827
|
-
if str(item[0]) in ret:
|
828
|
-
raise KubernetesException("Duplicate key found: %s" % str(item[0]))
|
829
|
-
ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
|
830
|
-
return ret
|
831
|
-
except KubernetesException as e:
|
832
|
-
raise e
|
833
|
-
except (AttributeError, IndexError):
|
834
|
-
raise KubernetesException("Unable to parse kubernetes list: %s" % items)
|
@@ -3,14 +3,17 @@ import sys
|
|
3
3
|
import time
|
4
4
|
import traceback
|
5
5
|
|
6
|
-
from metaflow.plugins.kubernetes.kube_utils import
|
6
|
+
from metaflow.plugins.kubernetes.kube_utils import (
|
7
|
+
parse_cli_options,
|
8
|
+
parse_kube_keyvalue_list,
|
9
|
+
)
|
7
10
|
from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
|
8
11
|
import metaflow.tracing as tracing
|
9
12
|
from metaflow import JSONTypeClass, util
|
10
13
|
from metaflow._vendor import click
|
11
14
|
from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, MetaflowException
|
12
15
|
from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
|
13
|
-
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
16
|
+
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
14
17
|
from metaflow.mflog import TASK_LOG_SOURCE
|
15
18
|
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
16
19
|
|
@@ -18,9 +21,7 @@ from .kubernetes import (
|
|
18
21
|
Kubernetes,
|
19
22
|
KubernetesException,
|
20
23
|
KubernetesKilledException,
|
21
|
-
parse_kube_keyvalue_list,
|
22
24
|
)
|
23
|
-
from .kubernetes_decorator import KubernetesDecorator
|
24
25
|
|
25
26
|
|
26
27
|
@click.group()
|
@@ -132,6 +133,18 @@ def kubernetes():
|
|
132
133
|
type=str,
|
133
134
|
help="Quality of Service class for the Kubernetes pod",
|
134
135
|
)
|
136
|
+
@click.option(
|
137
|
+
"--labels",
|
138
|
+
default=None,
|
139
|
+
type=JSONTypeClass(),
|
140
|
+
multiple=False,
|
141
|
+
)
|
142
|
+
@click.option(
|
143
|
+
"--annotations",
|
144
|
+
default=None,
|
145
|
+
type=JSONTypeClass(),
|
146
|
+
multiple=False,
|
147
|
+
)
|
135
148
|
@click.pass_context
|
136
149
|
def step(
|
137
150
|
ctx,
|
@@ -161,6 +174,8 @@ def step(
|
|
161
174
|
port=None,
|
162
175
|
num_parallel=None,
|
163
176
|
qos=None,
|
177
|
+
labels=None,
|
178
|
+
annotations=None,
|
164
179
|
**kwargs
|
165
180
|
):
|
166
181
|
def echo(msg, stream="stderr", job_id=None, **kwargs):
|
@@ -302,8 +317,10 @@ def step(
|
|
302
317
|
port=port,
|
303
318
|
num_parallel=num_parallel,
|
304
319
|
qos=qos,
|
320
|
+
labels=labels,
|
321
|
+
annotations=annotations,
|
305
322
|
)
|
306
|
-
except Exception
|
323
|
+
except Exception:
|
307
324
|
traceback.print_exc(chain=False)
|
308
325
|
_sync_metadata()
|
309
326
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|
@@ -19,6 +19,8 @@ from metaflow.metaflow_config import (
|
|
19
19
|
KUBERNETES_GPU_VENDOR,
|
20
20
|
KUBERNETES_IMAGE_PULL_POLICY,
|
21
21
|
KUBERNETES_MEMORY,
|
22
|
+
KUBERNETES_LABELS,
|
23
|
+
KUBERNETES_ANNOTATIONS,
|
22
24
|
KUBERNETES_NAMESPACE,
|
23
25
|
KUBERNETES_NODE_SELECTOR,
|
24
26
|
KUBERNETES_PERSISTENT_VOLUME_CLAIMS,
|
@@ -34,7 +36,8 @@ from metaflow.sidecar import Sidecar
|
|
34
36
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
35
37
|
|
36
38
|
from ..aws.aws_utils import get_docker_registry, get_ec2_instance_metadata
|
37
|
-
from .kubernetes import KubernetesException
|
39
|
+
from .kubernetes import KubernetesException
|
40
|
+
from .kube_utils import validate_kube_labels, parse_kube_keyvalue_list
|
38
41
|
|
39
42
|
try:
|
40
43
|
unicode
|
@@ -89,6 +92,10 @@ class KubernetesDecorator(StepDecorator):
|
|
89
92
|
tolerations : List[str], default []
|
90
93
|
The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
|
91
94
|
Kubernetes tolerations to use when launching pod in Kubernetes.
|
95
|
+
labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
|
96
|
+
Kubernetes labels to use when launching pod in Kubernetes.
|
97
|
+
annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
|
98
|
+
Kubernetes annotations to use when launching pod in Kubernetes.
|
92
99
|
use_tmpfs : bool, default False
|
93
100
|
This enables an explicit tmpfs mount for this step.
|
94
101
|
tmpfs_tempdir : bool, default True
|
@@ -131,6 +138,8 @@ class KubernetesDecorator(StepDecorator):
|
|
131
138
|
"gpu_vendor": None,
|
132
139
|
"tolerations": None, # e.g., [{"key": "arch", "operator": "Equal", "value": "amd"},
|
133
140
|
# {"key": "foo", "operator": "Equal", "value": "bar"}]
|
141
|
+
"labels": None, # e.g. {"test-label": "value", "another-label":"value2"}
|
142
|
+
"annotations": None, # e.g. {"note": "value", "another-note": "value2"}
|
134
143
|
"use_tmpfs": None,
|
135
144
|
"tmpfs_tempdir": True,
|
136
145
|
"tmpfs_size": None,
|
@@ -217,6 +226,36 @@ class KubernetesDecorator(StepDecorator):
|
|
217
226
|
self.attributes["memory"] = KUBERNETES_MEMORY
|
218
227
|
if self.attributes["disk"] == self.defaults["disk"] and KUBERNETES_DISK:
|
219
228
|
self.attributes["disk"] = KUBERNETES_DISK
|
229
|
+
# Label source precedence (decreasing):
|
230
|
+
# - System labels (set outside of decorator)
|
231
|
+
# - Decorator labels: @kubernetes(labels={})
|
232
|
+
# - Environment variable labels: METAFLOW_KUBERNETES_LABELS=
|
233
|
+
deco_labels = {}
|
234
|
+
if self.attributes["labels"] is not None:
|
235
|
+
deco_labels = self.attributes["labels"]
|
236
|
+
|
237
|
+
env_labels = {}
|
238
|
+
if KUBERNETES_LABELS:
|
239
|
+
env_labels = parse_kube_keyvalue_list(KUBERNETES_LABELS.split(","), False)
|
240
|
+
|
241
|
+
self.attributes["labels"] = {**env_labels, **deco_labels}
|
242
|
+
|
243
|
+
# Annotations
|
244
|
+
# annotation precedence (decreasing):
|
245
|
+
# - System annotations (set outside of decorator)
|
246
|
+
# - Decorator annotations: @kubernetes(annotations={})
|
247
|
+
# - Environment annotations: METAFLOW_KUBERNETES_ANNOTATIONS=
|
248
|
+
deco_annotations = {}
|
249
|
+
if self.attributes["annotations"] is not None:
|
250
|
+
deco_annotations = self.attributes["annotations"]
|
251
|
+
|
252
|
+
env_annotations = {}
|
253
|
+
if KUBERNETES_ANNOTATIONS:
|
254
|
+
env_annotations = parse_kube_keyvalue_list(
|
255
|
+
KUBERNETES_ANNOTATIONS.split(","), False
|
256
|
+
)
|
257
|
+
|
258
|
+
self.attributes["annotations"] = {**env_annotations, **deco_annotations}
|
220
259
|
|
221
260
|
# If no docker image is explicitly specified, impute a default image.
|
222
261
|
if not self.attributes["image"]:
|
@@ -371,6 +410,9 @@ class KubernetesDecorator(StepDecorator):
|
|
371
410
|
)
|
372
411
|
)
|
373
412
|
|
413
|
+
validate_kube_labels(self.attributes["labels"])
|
414
|
+
# TODO: add validation to annotations as well?
|
415
|
+
|
374
416
|
def package_init(self, flow, step_name, environment):
|
375
417
|
try:
|
376
418
|
# Kubernetes is a soft dependency.
|
@@ -426,7 +468,12 @@ class KubernetesDecorator(StepDecorator):
|
|
426
468
|
"=".join([key, str(val)]) if val else key
|
427
469
|
for key, val in v.items()
|
428
470
|
]
|
429
|
-
elif k in [
|
471
|
+
elif k in [
|
472
|
+
"tolerations",
|
473
|
+
"persistent_volume_claims",
|
474
|
+
"labels",
|
475
|
+
"annotations",
|
476
|
+
]:
|
430
477
|
cli_args.command_options[k] = json.dumps(v)
|
431
478
|
else:
|
432
479
|
cli_args.command_options[k] = v
|
@@ -1,22 +1,19 @@
|
|
1
|
-
import copy
|
2
1
|
import json
|
3
2
|
import math
|
4
3
|
import random
|
5
|
-
import sys
|
6
4
|
import time
|
7
5
|
|
8
6
|
from metaflow.exception import MetaflowException
|
9
7
|
from metaflow.metaflow_config import KUBERNETES_SECRETS
|
10
8
|
from metaflow.tracing import inject_tracing_vars
|
11
|
-
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
12
9
|
|
13
10
|
CLIENT_REFRESH_INTERVAL_SECONDS = 300
|
11
|
+
|
12
|
+
from .kube_utils import qos_requests_and_limits
|
14
13
|
from .kubernetes_jobsets import (
|
15
14
|
KubernetesJobSet,
|
16
15
|
) # We need this import for Kubernetes Client.
|
17
16
|
|
18
|
-
from .kube_utils import qos_requests_and_limits
|
19
|
-
|
20
17
|
|
21
18
|
class KubernetesJobException(MetaflowException):
|
22
19
|
headline = "Kubernetes job error"
|
@@ -430,7 +427,7 @@ class RunningJob(object):
|
|
430
427
|
def best_effort_kill():
|
431
428
|
try:
|
432
429
|
self.kill()
|
433
|
-
except Exception
|
430
|
+
except Exception:
|
434
431
|
pass
|
435
432
|
|
436
433
|
atexit.register(best_effort_kill)
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import copy
|
2
1
|
import json
|
3
2
|
import math
|
4
3
|
import random
|
@@ -7,7 +6,6 @@ from collections import namedtuple
|
|
7
6
|
from metaflow.exception import MetaflowException
|
8
7
|
from metaflow.metaflow_config import KUBERNETES_JOBSET_GROUP, KUBERNETES_JOBSET_VERSION
|
9
8
|
from metaflow.tracing import inject_tracing_vars
|
10
|
-
from metaflow.metaflow_config import KUBERNETES_SECRETS
|
11
9
|
|
12
10
|
from .kube_utils import qos_requests_and_limits
|
13
11
|
|
@@ -257,7 +255,7 @@ class RunningJobSet(object):
|
|
257
255
|
def best_effort_kill():
|
258
256
|
try:
|
259
257
|
self.kill()
|
260
|
-
except Exception
|
258
|
+
except Exception:
|
261
259
|
pass
|
262
260
|
|
263
261
|
atexit.register(best_effort_kill)
|
@@ -342,7 +340,7 @@ class RunningJobSet(object):
|
|
342
340
|
stdout=True,
|
343
341
|
tty=False,
|
344
342
|
)
|
345
|
-
except Exception
|
343
|
+
except Exception:
|
346
344
|
with client.ApiClient() as api_client:
|
347
345
|
# If we are unable to kill the control pod then
|
348
346
|
# Delete the jobset to kill the subsequent pods.
|
@@ -862,6 +860,16 @@ class KubernetesJobSet(object):
|
|
862
860
|
self._annotations = dict(self._annotations, **{name: value})
|
863
861
|
return self
|
864
862
|
|
863
|
+
def labels(self, labels):
|
864
|
+
for k, v in labels.items():
|
865
|
+
self.label(k, v)
|
866
|
+
return self
|
867
|
+
|
868
|
+
def annotations(self, annotations):
|
869
|
+
for k, v in annotations.items():
|
870
|
+
self.annotation(k, v)
|
871
|
+
return self
|
872
|
+
|
865
873
|
def secret(self, name):
|
866
874
|
self.worker.secret(name)
|
867
875
|
self.control.secret(name)
|
@@ -987,15 +995,24 @@ class KubernetesArgoJobSet(object):
|
|
987
995
|
self._labels = dict(self._labels, **{name: value})
|
988
996
|
return self
|
989
997
|
|
998
|
+
def labels(self, labels):
|
999
|
+
for k, v in labels.items():
|
1000
|
+
self.label(k, v)
|
1001
|
+
return self
|
1002
|
+
|
990
1003
|
def annotation(self, name, value):
|
991
1004
|
self.worker.annotation(name, value)
|
992
1005
|
self.control.annotation(name, value)
|
993
1006
|
self._annotations = dict(self._annotations, **{name: value})
|
994
1007
|
return self
|
995
1008
|
|
1009
|
+
def annotations(self, annotations):
|
1010
|
+
for k, v in annotations.items():
|
1011
|
+
self.annotation(k, v)
|
1012
|
+
return self
|
1013
|
+
|
996
1014
|
def dump(self):
|
997
1015
|
client = self._kubernetes_sdk
|
998
|
-
import json
|
999
1016
|
|
1000
1017
|
data = json.dumps(
|
1001
1018
|
client.ApiClient().sanitize_for_serialization(
|