ob-metaflow 2.12.39.1__py2.py3-none-any.whl → 2.13.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +7 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +35 -21
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/RECORD +54 -54
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import requests
|
|
3
2
|
|
|
4
3
|
from metaflow.exception import MetaflowException
|
|
5
4
|
from metaflow.metaflow_config import MAX_MEMORY_PER_TASK, MAX_CPU_PER_TASK
|
|
@@ -31,6 +30,10 @@ def get_ec2_instance_metadata():
|
|
|
31
30
|
- ec2-region
|
|
32
31
|
- ec2-availability-zone
|
|
33
32
|
"""
|
|
33
|
+
|
|
34
|
+
# TODO: Remove dependency on requests
|
|
35
|
+
import requests
|
|
36
|
+
|
|
34
37
|
meta = {}
|
|
35
38
|
# Capture AWS instance identity metadata. This is best-effort only since
|
|
36
39
|
# access to this end-point might be blocked on AWS and not available
|
|
@@ -160,6 +163,8 @@ def compute_resource_attributes(decos, compute_deco, step_name, resource_default
|
|
|
160
163
|
# Here we don't have ints, so we compare the value and raise
|
|
161
164
|
# an exception if not equal
|
|
162
165
|
if my_val != v:
|
|
166
|
+
# TODO: Throw a better exception since the user has no
|
|
167
|
+
# knowledge of 'compute' decorator
|
|
163
168
|
raise MetaflowException(
|
|
164
169
|
"'resources' and compute decorator have conflicting "
|
|
165
170
|
"values for '%s'. Please use consistent values or "
|
|
@@ -1,34 +1,30 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import platform
|
|
4
|
-
import
|
|
3
|
+
import sys
|
|
5
4
|
import time
|
|
6
5
|
|
|
7
|
-
from metaflow import util
|
|
8
6
|
from metaflow import R, current
|
|
9
|
-
|
|
10
7
|
from metaflow.decorators import StepDecorator
|
|
11
|
-
from metaflow.plugins.resources_decorator import ResourcesDecorator
|
|
12
|
-
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
13
8
|
from metaflow.metadata_provider import MetaDatum
|
|
14
9
|
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
|
|
15
10
|
from metaflow.metaflow_config import (
|
|
16
|
-
ECS_S3_ACCESS_IAM_ROLE,
|
|
17
|
-
BATCH_JOB_QUEUE,
|
|
18
11
|
BATCH_CONTAINER_IMAGE,
|
|
19
12
|
BATCH_CONTAINER_REGISTRY,
|
|
20
|
-
|
|
13
|
+
BATCH_JOB_QUEUE,
|
|
21
14
|
DATASTORE_LOCAL_DIR,
|
|
15
|
+
ECS_FARGATE_EXECUTION_ROLE,
|
|
16
|
+
ECS_S3_ACCESS_IAM_ROLE,
|
|
22
17
|
)
|
|
18
|
+
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
23
19
|
from metaflow.sidecar import Sidecar
|
|
24
20
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
|
25
21
|
|
|
26
|
-
from .batch import BatchException
|
|
27
22
|
from ..aws_utils import (
|
|
28
23
|
compute_resource_attributes,
|
|
29
24
|
get_docker_registry,
|
|
30
25
|
get_ec2_instance_metadata,
|
|
31
26
|
)
|
|
27
|
+
from .batch import BatchException
|
|
32
28
|
|
|
33
29
|
|
|
34
30
|
class BatchDecorator(StepDecorator):
|
|
@@ -280,6 +276,10 @@ class BatchDecorator(StepDecorator):
|
|
|
280
276
|
# Metaflow would be running the container agent compatible with
|
|
281
277
|
# version V4.
|
|
282
278
|
# https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
|
|
279
|
+
|
|
280
|
+
# TODO: Remove dependency on requests
|
|
281
|
+
import requests
|
|
282
|
+
|
|
283
283
|
try:
|
|
284
284
|
logs_meta = (
|
|
285
285
|
requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
|
|
@@ -386,7 +386,7 @@ class BatchDecorator(StepDecorator):
|
|
|
386
386
|
len(flow._control_mapper_tasks),
|
|
387
387
|
)
|
|
388
388
|
)
|
|
389
|
-
except Exception
|
|
389
|
+
except Exception:
|
|
390
390
|
pass
|
|
391
391
|
raise Exception(
|
|
392
392
|
"Batch secondary workers did not finish in %s seconds" % TIMEOUT
|
|
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
|
|
|
50
50
|
The secret payload from AWS is EITHER a string OR a binary blob.
|
|
51
51
|
|
|
52
52
|
If the secret contains a string payload ("SecretString"):
|
|
53
|
-
- if the `
|
|
53
|
+
- if the `json` option is True (default):
|
|
54
54
|
{SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
|
55
55
|
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
|
56
56
|
always be casted to a string (if not already a string).
|
|
57
|
-
- If `
|
|
58
|
-
{SecretString} will be returned as a single entry in the result,
|
|
57
|
+
- If `json` option is False:
|
|
58
|
+
{SecretString} will be returned as a single entry in the result, where the key is either:
|
|
59
|
+
- the `secret_id`, OR
|
|
60
|
+
- the value set by `options={"env_var_name": custom_env_var_name}`.
|
|
59
61
|
|
|
60
|
-
Otherwise, the secret contains a binary blob payload ("SecretBinary")
|
|
61
|
-
- The result
|
|
62
|
+
Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
|
|
63
|
+
- The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
|
|
62
64
|
|
|
63
|
-
All keys in the result are sanitized to be more valid environment variable names. This is done on a best
|
|
65
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
|
|
64
66
|
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
|
65
67
|
|
|
66
|
-
:param secret_id: ARN or friendly name of the secret
|
|
67
|
-
:param options:
|
|
68
|
-
:param role: AWS IAM Role ARN to assume before reading the secret
|
|
69
|
-
:return:
|
|
68
|
+
:param secret_id: ARN or friendly name of the secret.
|
|
69
|
+
:param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
|
|
70
|
+
:param role: AWS IAM Role ARN to assume before reading the secret.
|
|
71
|
+
:return: Dictionary of environment variables. All keys and values are strings.
|
|
70
72
|
"""
|
|
73
|
+
|
|
71
74
|
import botocore
|
|
72
75
|
from metaflow.plugins.aws.aws_client import get_aws_client
|
|
73
76
|
|
|
@@ -236,7 +236,7 @@ class StepFunctions(object):
|
|
|
236
236
|
return parameters.get("metaflow.owner"), parameters.get(
|
|
237
237
|
"metaflow.production_token"
|
|
238
238
|
)
|
|
239
|
-
except KeyError
|
|
239
|
+
except KeyError:
|
|
240
240
|
raise StepFunctionsException(
|
|
241
241
|
"An existing non-metaflow "
|
|
242
242
|
"workflow with the same name as "
|
|
@@ -4,7 +4,6 @@ import re
|
|
|
4
4
|
from hashlib import sha1
|
|
5
5
|
|
|
6
6
|
from metaflow import JSONType, current, decorators, parameters
|
|
7
|
-
from metaflow.client.core import get_metadata
|
|
8
7
|
from metaflow._vendor import click
|
|
9
8
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
|
10
9
|
from metaflow.metaflow_config import (
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
1
6
|
from metaflow.decorators import StepDecorator
|
|
2
7
|
from metaflow.metaflow_current import current
|
|
8
|
+
from metaflow.user_configs.config_options import ConfigInput
|
|
9
|
+
from metaflow.user_configs.config_parameters import dump_config_values
|
|
3
10
|
from metaflow.util import to_unicode
|
|
11
|
+
|
|
4
12
|
from .component_serializer import CardComponentCollector, get_card_class
|
|
5
13
|
from .card_creator import CardCreator
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# from metaflow import get_metadata
|
|
9
|
-
import re
|
|
10
|
-
|
|
11
14
|
from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX
|
|
12
15
|
|
|
13
16
|
ASYNC_TIMEOUT = 30
|
|
@@ -111,6 +114,14 @@ class CardDecorator(StepDecorator):
|
|
|
111
114
|
self._logger = logger
|
|
112
115
|
self.card_options = None
|
|
113
116
|
|
|
117
|
+
# We check for configuration options. We do this here before they are
|
|
118
|
+
# converted to properties.
|
|
119
|
+
self._config_values = [
|
|
120
|
+
(config.name, ConfigInput.make_key_name(config.name))
|
|
121
|
+
for _, config in flow._get_parameters()
|
|
122
|
+
if config.IS_CONFIG_PARAMETER
|
|
123
|
+
]
|
|
124
|
+
|
|
114
125
|
self.card_options = self.attributes["options"]
|
|
115
126
|
|
|
116
127
|
evt_name = "step-init"
|
|
@@ -146,6 +157,18 @@ class CardDecorator(StepDecorator):
|
|
|
146
157
|
self._task_datastore = task_datastore
|
|
147
158
|
self._metadata = metadata
|
|
148
159
|
|
|
160
|
+
# If we have configs, we need to dump them to a file so we can re-use them
|
|
161
|
+
# when calling the card creation subprocess.
|
|
162
|
+
if self._config_values:
|
|
163
|
+
with tempfile.NamedTemporaryFile(
|
|
164
|
+
mode="w", encoding="utf-8", delete=False
|
|
165
|
+
) as config_file:
|
|
166
|
+
config_value = dump_config_values(flow)
|
|
167
|
+
json.dump(config_value, config_file)
|
|
168
|
+
self._config_file_name = config_file.name
|
|
169
|
+
else:
|
|
170
|
+
self._config_file_name = None
|
|
171
|
+
|
|
149
172
|
card_type = self.attributes["type"]
|
|
150
173
|
card_class = get_card_class(card_type)
|
|
151
174
|
|
|
@@ -179,7 +202,7 @@ class CardDecorator(StepDecorator):
|
|
|
179
202
|
# we need to ensure that `current.card` has `CardComponentCollector` instantiated only once.
|
|
180
203
|
if not self._is_event_registered("pre-step"):
|
|
181
204
|
self._register_event("pre-step")
|
|
182
|
-
self._set_card_creator(CardCreator(self._create_top_level_args()))
|
|
205
|
+
self._set_card_creator(CardCreator(self._create_top_level_args(flow)))
|
|
183
206
|
|
|
184
207
|
current._update_env(
|
|
185
208
|
{"card": CardComponentCollector(self._logger, self.card_creator)}
|
|
@@ -223,6 +246,13 @@ class CardDecorator(StepDecorator):
|
|
|
223
246
|
self.card_creator.create(mode="render", final=True, **create_options)
|
|
224
247
|
self.card_creator.create(mode="refresh", final=True, **create_options)
|
|
225
248
|
|
|
249
|
+
# Unlink the config file if it exists
|
|
250
|
+
if self._config_file_name:
|
|
251
|
+
try:
|
|
252
|
+
os.unlink(self._config_file_name)
|
|
253
|
+
except Exception as e:
|
|
254
|
+
pass
|
|
255
|
+
|
|
226
256
|
@staticmethod
|
|
227
257
|
def _options(mapping):
|
|
228
258
|
for k, v in mapping.items():
|
|
@@ -232,9 +262,13 @@ class CardDecorator(StepDecorator):
|
|
|
232
262
|
for value in v:
|
|
233
263
|
yield "--%s" % k
|
|
234
264
|
if not isinstance(value, bool):
|
|
235
|
-
|
|
265
|
+
if isinstance(value, tuple):
|
|
266
|
+
for val in value:
|
|
267
|
+
yield to_unicode(val)
|
|
268
|
+
else:
|
|
269
|
+
yield to_unicode(value)
|
|
236
270
|
|
|
237
|
-
def _create_top_level_args(self):
|
|
271
|
+
def _create_top_level_args(self, flow):
|
|
238
272
|
top_level_options = {
|
|
239
273
|
"quiet": True,
|
|
240
274
|
"metadata": self._metadata.TYPE,
|
|
@@ -247,4 +281,8 @@ class CardDecorator(StepDecorator):
|
|
|
247
281
|
# We don't provide --with as all execution is taking place in
|
|
248
282
|
# the context of the main process
|
|
249
283
|
}
|
|
284
|
+
if self._config_values:
|
|
285
|
+
top_level_options["config-value"] = self._config_values
|
|
286
|
+
top_level_options["local-config-file"] = self._config_file_name
|
|
287
|
+
|
|
250
288
|
return list(self._options(top_level_options))
|
|
@@ -1,7 +1,14 @@
|
|
|
1
|
-
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
from metaflow.exception import CommandException, MetaflowException
|
|
2
4
|
from metaflow.util import get_username, get_latest_run_id
|
|
3
5
|
|
|
4
6
|
|
|
7
|
+
# avoid circular import by having the exception class contained here
|
|
8
|
+
class KubernetesException(MetaflowException):
|
|
9
|
+
headline = "Kubernetes error"
|
|
10
|
+
|
|
11
|
+
|
|
5
12
|
def parse_cli_options(flow_name, run_id, user, my_runs, echo):
|
|
6
13
|
if user and my_runs:
|
|
7
14
|
raise CommandException("--user and --my-runs are mutually exclusive.")
|
|
@@ -52,3 +59,50 @@ def qos_requests_and_limits(qos: str, cpu: int, memory: int, storage: int):
|
|
|
52
59
|
# TODO: Add support for BestEffort once there is a use case for it.
|
|
53
60
|
# BestEffort - no limit or requests for cpu/memory
|
|
54
61
|
return qos_requests, qos_limits
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_kube_labels(
|
|
65
|
+
labels: Optional[Dict[str, Optional[str]]],
|
|
66
|
+
) -> bool:
|
|
67
|
+
"""Validate label values.
|
|
68
|
+
|
|
69
|
+
This validates the kubernetes label values. It does not validate the keys.
|
|
70
|
+
Ideally, keys should be static and also the validation rules for keys are
|
|
71
|
+
more complex than those for values. For full validation rules, see:
|
|
72
|
+
|
|
73
|
+
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def validate_label(s: Optional[str]):
|
|
77
|
+
regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
|
|
78
|
+
if not s:
|
|
79
|
+
# allow empty label
|
|
80
|
+
return True
|
|
81
|
+
if not re.search(regex_match, s):
|
|
82
|
+
raise KubernetesException(
|
|
83
|
+
'Invalid value: "%s"\n'
|
|
84
|
+
"A valid label must be an empty string or one that\n"
|
|
85
|
+
" - Consist of alphanumeric, '-', '_' or '.' characters\n"
|
|
86
|
+
" - Begins and ends with an alphanumeric character\n"
|
|
87
|
+
" - Is at most 63 characters" % s
|
|
88
|
+
)
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
return all([validate_label(v) for v in labels.values()]) if labels else True
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
|
|
95
|
+
try:
|
|
96
|
+
ret = {}
|
|
97
|
+
for item_str in items:
|
|
98
|
+
item = item_str.split("=", 1)
|
|
99
|
+
if requires_both:
|
|
100
|
+
item[1] # raise IndexError
|
|
101
|
+
if str(item[0]) in ret:
|
|
102
|
+
raise KubernetesException("Duplicate key found: %s" % str(item[0]))
|
|
103
|
+
ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
|
|
104
|
+
return ret
|
|
105
|
+
except KubernetesException as e:
|
|
106
|
+
raise e
|
|
107
|
+
except (AttributeError, IndexError):
|
|
108
|
+
raise KubernetesException("Unable to parse kubernetes list: %s" % items)
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import json
|
|
3
2
|
import math
|
|
4
3
|
import os
|
|
5
|
-
import re
|
|
6
4
|
import shlex
|
|
7
5
|
import time
|
|
8
|
-
from typing import Dict, List, Optional
|
|
9
6
|
from uuid import uuid4
|
|
10
7
|
|
|
11
8
|
from metaflow import current, util
|
|
@@ -35,7 +32,6 @@ from metaflow.metaflow_config import (
|
|
|
35
32
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
|
36
33
|
GCP_SECRET_MANAGER_PREFIX,
|
|
37
34
|
KUBERNETES_FETCH_EC2_METADATA,
|
|
38
|
-
KUBERNETES_LABELS,
|
|
39
35
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
|
40
36
|
OTEL_ENDPOINT,
|
|
41
37
|
S3_ENDPOINT_URL,
|
|
@@ -193,6 +189,7 @@ class Kubernetes(object):
|
|
|
193
189
|
persistent_volume_claims=None,
|
|
194
190
|
tolerations=None,
|
|
195
191
|
labels=None,
|
|
192
|
+
annotations=None,
|
|
196
193
|
shared_memory=None,
|
|
197
194
|
port=None,
|
|
198
195
|
num_parallel=None,
|
|
@@ -304,10 +301,6 @@ class Kubernetes(object):
|
|
|
304
301
|
# see get_datastore_root_from_config in datastore/local.py).
|
|
305
302
|
)
|
|
306
303
|
|
|
307
|
-
_labels = self._get_labels(labels)
|
|
308
|
-
for k, v in _labels.items():
|
|
309
|
-
jobset.label(k, v)
|
|
310
|
-
|
|
311
304
|
for k in list(
|
|
312
305
|
[] if not secrets else [secrets] if isinstance(secrets, str) else secrets
|
|
313
306
|
) + KUBERNETES_SECRETS.split(","):
|
|
@@ -395,13 +388,16 @@ class Kubernetes(object):
|
|
|
395
388
|
for name, value in env.items():
|
|
396
389
|
jobset.environment_variable(name, value)
|
|
397
390
|
|
|
398
|
-
|
|
391
|
+
system_annotations = {
|
|
399
392
|
"metaflow/user": user,
|
|
400
393
|
"metaflow/flow_name": flow_name,
|
|
401
394
|
"metaflow/control-task-id": task_id,
|
|
395
|
+
"metaflow/run_id": run_id,
|
|
396
|
+
"metaflow/step_name": step_name,
|
|
397
|
+
"metaflow/attempt": attempt,
|
|
402
398
|
}
|
|
403
399
|
if current.get("project_name"):
|
|
404
|
-
|
|
400
|
+
system_annotations.update(
|
|
405
401
|
{
|
|
406
402
|
"metaflow/project_name": current.project_name,
|
|
407
403
|
"metaflow/branch_name": current.branch_name,
|
|
@@ -409,15 +405,15 @@ class Kubernetes(object):
|
|
|
409
405
|
}
|
|
410
406
|
)
|
|
411
407
|
|
|
412
|
-
|
|
413
|
-
|
|
408
|
+
system_labels = {
|
|
409
|
+
"app.kubernetes.io/name": "metaflow-task",
|
|
410
|
+
"app.kubernetes.io/part-of": "metaflow",
|
|
411
|
+
}
|
|
414
412
|
|
|
415
|
-
(
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
.label("app.kubernetes.io/name", "metaflow-task")
|
|
420
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
413
|
+
jobset.labels({**({} if not labels else labels), **system_labels})
|
|
414
|
+
|
|
415
|
+
jobset.annotations(
|
|
416
|
+
{**({} if not annotations else annotations), **system_annotations}
|
|
421
417
|
)
|
|
422
418
|
# We need this task-id set so that all the nodes are aware of the control
|
|
423
419
|
# task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
|
|
@@ -507,6 +503,7 @@ class Kubernetes(object):
|
|
|
507
503
|
port=None,
|
|
508
504
|
name_pattern=None,
|
|
509
505
|
qos=None,
|
|
506
|
+
annotations=None,
|
|
510
507
|
):
|
|
511
508
|
if env is None:
|
|
512
509
|
env = {}
|
|
@@ -539,7 +536,8 @@ class Kubernetes(object):
|
|
|
539
536
|
retries=0,
|
|
540
537
|
step_name=step_name,
|
|
541
538
|
tolerations=tolerations,
|
|
542
|
-
labels=
|
|
539
|
+
labels=labels,
|
|
540
|
+
annotations=annotations,
|
|
543
541
|
use_tmpfs=use_tmpfs,
|
|
544
542
|
tmpfs_tempdir=tmpfs_tempdir,
|
|
545
543
|
tmpfs_size=tmpfs_size,
|
|
@@ -658,13 +656,25 @@ class Kubernetes(object):
|
|
|
658
656
|
|
|
659
657
|
for name, value in env.items():
|
|
660
658
|
job.environment_variable(name, value)
|
|
659
|
+
# Add job specific labels
|
|
660
|
+
system_labels = {
|
|
661
|
+
"app.kubernetes.io/name": "metaflow-task",
|
|
662
|
+
"app.kubernetes.io/part-of": "metaflow",
|
|
663
|
+
}
|
|
664
|
+
for name, value in system_labels.items():
|
|
665
|
+
job.label(name, value)
|
|
661
666
|
|
|
662
|
-
annotations
|
|
663
|
-
|
|
667
|
+
# Add job specific annotations not set in the decorator.
|
|
668
|
+
system_annotations = {
|
|
664
669
|
"metaflow/flow_name": flow_name,
|
|
670
|
+
"metaflow/run_id": run_id,
|
|
671
|
+
"metaflow/step_name": step_name,
|
|
672
|
+
"metaflow/task_id": task_id,
|
|
673
|
+
"metaflow/attempt": attempt,
|
|
674
|
+
"metaflow/user": user,
|
|
665
675
|
}
|
|
666
676
|
if current.get("project_name"):
|
|
667
|
-
|
|
677
|
+
system_annotations.update(
|
|
668
678
|
{
|
|
669
679
|
"metaflow/project_name": current.project_name,
|
|
670
680
|
"metaflow/branch_name": current.branch_name,
|
|
@@ -672,7 +682,7 @@ class Kubernetes(object):
|
|
|
672
682
|
}
|
|
673
683
|
)
|
|
674
684
|
|
|
675
|
-
for name, value in
|
|
685
|
+
for name, value in system_annotations.items():
|
|
676
686
|
job.annotation(name, value)
|
|
677
687
|
|
|
678
688
|
(
|
|
@@ -791,60 +801,3 @@ class Kubernetes(object):
|
|
|
791
801
|
"stderr",
|
|
792
802
|
job_id=self._job.id,
|
|
793
803
|
)
|
|
794
|
-
|
|
795
|
-
@staticmethod
|
|
796
|
-
def _get_labels(extra_labels=None):
|
|
797
|
-
if extra_labels is None:
|
|
798
|
-
extra_labels = {}
|
|
799
|
-
env_labels = KUBERNETES_LABELS.split(",") if KUBERNETES_LABELS else []
|
|
800
|
-
env_labels = parse_kube_keyvalue_list(env_labels, False)
|
|
801
|
-
labels = {**env_labels, **extra_labels}
|
|
802
|
-
validate_kube_labels(labels)
|
|
803
|
-
return labels
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
def validate_kube_labels(
|
|
807
|
-
labels: Optional[Dict[str, Optional[str]]],
|
|
808
|
-
) -> bool:
|
|
809
|
-
"""Validate label values.
|
|
810
|
-
|
|
811
|
-
This validates the kubernetes label values. It does not validate the keys.
|
|
812
|
-
Ideally, keys should be static and also the validation rules for keys are
|
|
813
|
-
more complex than those for values. For full validation rules, see:
|
|
814
|
-
|
|
815
|
-
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
816
|
-
"""
|
|
817
|
-
|
|
818
|
-
def validate_label(s: Optional[str]):
|
|
819
|
-
regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
|
|
820
|
-
if not s:
|
|
821
|
-
# allow empty label
|
|
822
|
-
return True
|
|
823
|
-
if not re.search(regex_match, s):
|
|
824
|
-
raise KubernetesException(
|
|
825
|
-
'Invalid value: "%s"\n'
|
|
826
|
-
"A valid label must be an empty string or one that\n"
|
|
827
|
-
" - Consist of alphanumeric, '-', '_' or '.' characters\n"
|
|
828
|
-
" - Begins and ends with an alphanumeric character\n"
|
|
829
|
-
" - Is at most 63 characters" % s
|
|
830
|
-
)
|
|
831
|
-
return True
|
|
832
|
-
|
|
833
|
-
return all([validate_label(v) for v in labels.values()]) if labels else True
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
|
|
837
|
-
try:
|
|
838
|
-
ret = {}
|
|
839
|
-
for item_str in items:
|
|
840
|
-
item = item_str.split("=", 1)
|
|
841
|
-
if requires_both:
|
|
842
|
-
item[1] # raise IndexError
|
|
843
|
-
if str(item[0]) in ret:
|
|
844
|
-
raise KubernetesException("Duplicate key found: %s" % str(item[0]))
|
|
845
|
-
ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
|
|
846
|
-
return ret
|
|
847
|
-
except KubernetesException as e:
|
|
848
|
-
raise e
|
|
849
|
-
except (AttributeError, IndexError):
|
|
850
|
-
raise KubernetesException("Unable to parse kubernetes list: %s" % items)
|
|
@@ -3,14 +3,17 @@ import sys
|
|
|
3
3
|
import time
|
|
4
4
|
import traceback
|
|
5
5
|
|
|
6
|
-
from metaflow.plugins.kubernetes.kube_utils import
|
|
6
|
+
from metaflow.plugins.kubernetes.kube_utils import (
|
|
7
|
+
parse_cli_options,
|
|
8
|
+
parse_kube_keyvalue_list,
|
|
9
|
+
)
|
|
7
10
|
from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
|
|
8
11
|
import metaflow.tracing as tracing
|
|
9
12
|
from metaflow import JSONTypeClass, util
|
|
10
13
|
from metaflow._vendor import click
|
|
11
14
|
from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, MetaflowException
|
|
12
15
|
from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
|
|
13
|
-
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
16
|
+
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
14
17
|
from metaflow.mflog import TASK_LOG_SOURCE
|
|
15
18
|
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
16
19
|
|
|
@@ -18,9 +21,7 @@ from .kubernetes import (
|
|
|
18
21
|
Kubernetes,
|
|
19
22
|
KubernetesException,
|
|
20
23
|
KubernetesKilledException,
|
|
21
|
-
parse_kube_keyvalue_list,
|
|
22
24
|
)
|
|
23
|
-
from .kubernetes_decorator import KubernetesDecorator
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
@click.group()
|
|
@@ -132,6 +133,18 @@ def kubernetes():
|
|
|
132
133
|
type=str,
|
|
133
134
|
help="Quality of Service class for the Kubernetes pod",
|
|
134
135
|
)
|
|
136
|
+
@click.option(
|
|
137
|
+
"--labels",
|
|
138
|
+
default=None,
|
|
139
|
+
type=JSONTypeClass(),
|
|
140
|
+
multiple=False,
|
|
141
|
+
)
|
|
142
|
+
@click.option(
|
|
143
|
+
"--annotations",
|
|
144
|
+
default=None,
|
|
145
|
+
type=JSONTypeClass(),
|
|
146
|
+
multiple=False,
|
|
147
|
+
)
|
|
135
148
|
@click.pass_context
|
|
136
149
|
def step(
|
|
137
150
|
ctx,
|
|
@@ -161,6 +174,8 @@ def step(
|
|
|
161
174
|
port=None,
|
|
162
175
|
num_parallel=None,
|
|
163
176
|
qos=None,
|
|
177
|
+
labels=None,
|
|
178
|
+
annotations=None,
|
|
164
179
|
**kwargs
|
|
165
180
|
):
|
|
166
181
|
def echo(msg, stream="stderr", job_id=None, **kwargs):
|
|
@@ -302,8 +317,10 @@ def step(
|
|
|
302
317
|
port=port,
|
|
303
318
|
num_parallel=num_parallel,
|
|
304
319
|
qos=qos,
|
|
320
|
+
labels=labels,
|
|
321
|
+
annotations=annotations,
|
|
305
322
|
)
|
|
306
|
-
except Exception
|
|
323
|
+
except Exception:
|
|
307
324
|
traceback.print_exc(chain=False)
|
|
308
325
|
_sync_metadata()
|
|
309
326
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|