metaflow 2.12.39__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +7 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +249 -81
- metaflow/plugins/pypi/conda_environment.py +83 -27
- metaflow/plugins/pypi/micromamba.py +77 -36
- metaflow/plugins/pypi/pip.py +9 -6
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/runner/utils.py +5 -0
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/RECORD +56 -56
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
import sys
|
2
2
|
import json
|
3
|
+
import time
|
3
4
|
import tempfile
|
4
5
|
from typing import ClassVar, Optional
|
5
6
|
|
@@ -170,6 +171,47 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
|
|
170
171
|
command_obj.sync_wait()
|
171
172
|
return command_obj.process.returncode == 0
|
172
173
|
|
174
|
+
def wait_for_completion(self, timeout: Optional[int] = None):
|
175
|
+
"""
|
176
|
+
Wait for the workflow to complete or timeout.
|
177
|
+
|
178
|
+
Parameters
|
179
|
+
----------
|
180
|
+
timeout : int, optional, default None
|
181
|
+
Maximum time in seconds to wait for workflow completion.
|
182
|
+
If None, waits indefinitely.
|
183
|
+
|
184
|
+
Raises
|
185
|
+
------
|
186
|
+
TimeoutError
|
187
|
+
If the workflow does not complete within the specified timeout period.
|
188
|
+
"""
|
189
|
+
start_time = time.time()
|
190
|
+
check_interval = 5
|
191
|
+
while self.is_running:
|
192
|
+
if timeout is not None and (time.time() - start_time) > timeout:
|
193
|
+
raise TimeoutError(
|
194
|
+
"Workflow did not complete within specified timeout."
|
195
|
+
)
|
196
|
+
time.sleep(check_interval)
|
197
|
+
|
198
|
+
@property
|
199
|
+
def is_running(self):
|
200
|
+
"""
|
201
|
+
Check if the workflow is currently running.
|
202
|
+
|
203
|
+
Returns
|
204
|
+
-------
|
205
|
+
bool
|
206
|
+
True if the workflow status is either 'Pending' or 'Running',
|
207
|
+
False otherwise.
|
208
|
+
"""
|
209
|
+
workflow_status = self.status
|
210
|
+
# full list of all states present here:
|
211
|
+
# https://github.com/argoproj/argo-workflows/blob/main/pkg/apis/workflow/v1alpha1/workflow_types.go#L54
|
212
|
+
# we only consider non-terminal states to determine if the workflow has not finished
|
213
|
+
return workflow_status is not None and workflow_status in ["Pending", "Running"]
|
214
|
+
|
173
215
|
@property
|
174
216
|
def status(self) -> Optional[str]:
|
175
217
|
"""
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import re
|
2
|
-
import requests
|
3
2
|
|
4
3
|
from metaflow.exception import MetaflowException
|
5
4
|
|
@@ -30,6 +29,10 @@ def get_ec2_instance_metadata():
|
|
30
29
|
- ec2-region
|
31
30
|
- ec2-availability-zone
|
32
31
|
"""
|
32
|
+
|
33
|
+
# TODO: Remove dependency on requests
|
34
|
+
import requests
|
35
|
+
|
33
36
|
meta = {}
|
34
37
|
# Capture AWS instance identity metadata. This is best-effort only since
|
35
38
|
# access to this end-point might be blocked on AWS and not available
|
@@ -159,6 +162,8 @@ def compute_resource_attributes(decos, compute_deco, resource_defaults):
|
|
159
162
|
# Here we don't have ints, so we compare the value and raise
|
160
163
|
# an exception if not equal
|
161
164
|
if my_val != v:
|
165
|
+
# TODO: Throw a better exception since the user has no
|
166
|
+
# knowledge of 'compute' decorator
|
162
167
|
raise MetaflowException(
|
163
168
|
"'resources' and compute decorator have conflicting "
|
164
169
|
"values for '%s'. Please use consistent values or "
|
@@ -1,34 +1,30 @@
|
|
1
1
|
import os
|
2
|
-
import sys
|
3
2
|
import platform
|
4
|
-
import
|
3
|
+
import sys
|
5
4
|
import time
|
6
5
|
|
7
|
-
from metaflow import util
|
8
6
|
from metaflow import R, current
|
9
|
-
|
10
7
|
from metaflow.decorators import StepDecorator
|
11
|
-
from metaflow.plugins.resources_decorator import ResourcesDecorator
|
12
|
-
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
13
8
|
from metaflow.metadata_provider import MetaDatum
|
14
9
|
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
|
15
10
|
from metaflow.metaflow_config import (
|
16
|
-
ECS_S3_ACCESS_IAM_ROLE,
|
17
|
-
BATCH_JOB_QUEUE,
|
18
11
|
BATCH_CONTAINER_IMAGE,
|
19
12
|
BATCH_CONTAINER_REGISTRY,
|
20
|
-
|
13
|
+
BATCH_JOB_QUEUE,
|
21
14
|
DATASTORE_LOCAL_DIR,
|
15
|
+
ECS_FARGATE_EXECUTION_ROLE,
|
16
|
+
ECS_S3_ACCESS_IAM_ROLE,
|
22
17
|
)
|
18
|
+
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
23
19
|
from metaflow.sidecar import Sidecar
|
24
20
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
25
21
|
|
26
|
-
from .batch import BatchException
|
27
22
|
from ..aws_utils import (
|
28
23
|
compute_resource_attributes,
|
29
24
|
get_docker_registry,
|
30
25
|
get_ec2_instance_metadata,
|
31
26
|
)
|
27
|
+
from .batch import BatchException
|
32
28
|
|
33
29
|
|
34
30
|
class BatchDecorator(StepDecorator):
|
@@ -280,6 +276,10 @@ class BatchDecorator(StepDecorator):
|
|
280
276
|
# Metaflow would be running the container agent compatible with
|
281
277
|
# version V4.
|
282
278
|
# https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
|
279
|
+
|
280
|
+
# TODO: Remove dependency on requests
|
281
|
+
import requests
|
282
|
+
|
283
283
|
try:
|
284
284
|
logs_meta = (
|
285
285
|
requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
|
@@ -386,7 +386,7 @@ class BatchDecorator(StepDecorator):
|
|
386
386
|
len(flow._control_mapper_tasks),
|
387
387
|
)
|
388
388
|
)
|
389
|
-
except Exception
|
389
|
+
except Exception:
|
390
390
|
pass
|
391
391
|
raise Exception(
|
392
392
|
"Batch secondary workers did not finish in %s seconds" % TIMEOUT
|
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
|
|
50
50
|
The secret payload from AWS is EITHER a string OR a binary blob.
|
51
51
|
|
52
52
|
If the secret contains a string payload ("SecretString"):
|
53
|
-
- if the `
|
53
|
+
- if the `json` option is True (default):
|
54
54
|
{SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
55
55
|
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
56
56
|
always be casted to a string (if not already a string).
|
57
|
-
- If `
|
58
|
-
{SecretString} will be returned as a single entry in the result,
|
57
|
+
- If `json` option is False:
|
58
|
+
{SecretString} will be returned as a single entry in the result, where the key is either:
|
59
|
+
- the `secret_id`, OR
|
60
|
+
- the value set by `options={"env_var_name": custom_env_var_name}`.
|
59
61
|
|
60
|
-
Otherwise, the secret contains a binary blob payload ("SecretBinary")
|
61
|
-
- The result
|
62
|
+
Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
|
63
|
+
- The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
|
62
64
|
|
63
|
-
All keys in the result are sanitized to be more valid environment variable names. This is done on a best
|
65
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
|
64
66
|
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
65
67
|
|
66
|
-
:param secret_id: ARN or friendly name of the secret
|
67
|
-
:param options:
|
68
|
-
:param role: AWS IAM Role ARN to assume before reading the secret
|
69
|
-
:return:
|
68
|
+
:param secret_id: ARN or friendly name of the secret.
|
69
|
+
:param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
|
70
|
+
:param role: AWS IAM Role ARN to assume before reading the secret.
|
71
|
+
:return: Dictionary of environment variables. All keys and values are strings.
|
70
72
|
"""
|
73
|
+
|
71
74
|
import botocore
|
72
75
|
from metaflow.plugins.aws.aws_client import get_aws_client
|
73
76
|
|
@@ -236,7 +236,7 @@ class StepFunctions(object):
|
|
236
236
|
return parameters.get("metaflow.owner"), parameters.get(
|
237
237
|
"metaflow.production_token"
|
238
238
|
)
|
239
|
-
except KeyError
|
239
|
+
except KeyError:
|
240
240
|
raise StepFunctionsException(
|
241
241
|
"An existing non-metaflow "
|
242
242
|
"workflow with the same name as "
|
@@ -4,7 +4,6 @@ import re
|
|
4
4
|
from hashlib import sha1
|
5
5
|
|
6
6
|
from metaflow import JSONType, current, decorators, parameters
|
7
|
-
from metaflow.client.core import get_metadata
|
8
7
|
from metaflow._vendor import click
|
9
8
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
10
9
|
from metaflow.metaflow_config import (
|
@@ -1,13 +1,16 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
import tempfile
|
5
|
+
|
1
6
|
from metaflow.decorators import StepDecorator
|
2
7
|
from metaflow.metaflow_current import current
|
8
|
+
from metaflow.user_configs.config_options import ConfigInput
|
9
|
+
from metaflow.user_configs.config_parameters import dump_config_values
|
3
10
|
from metaflow.util import to_unicode
|
11
|
+
|
4
12
|
from .component_serializer import CardComponentCollector, get_card_class
|
5
13
|
from .card_creator import CardCreator
|
6
|
-
|
7
|
-
|
8
|
-
# from metaflow import get_metadata
|
9
|
-
import re
|
10
|
-
|
11
14
|
from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX
|
12
15
|
|
13
16
|
ASYNC_TIMEOUT = 30
|
@@ -111,6 +114,14 @@ class CardDecorator(StepDecorator):
|
|
111
114
|
self._logger = logger
|
112
115
|
self.card_options = None
|
113
116
|
|
117
|
+
# We check for configuration options. We do this here before they are
|
118
|
+
# converted to properties.
|
119
|
+
self._config_values = [
|
120
|
+
(config.name, ConfigInput.make_key_name(config.name))
|
121
|
+
for _, config in flow._get_parameters()
|
122
|
+
if config.IS_CONFIG_PARAMETER
|
123
|
+
]
|
124
|
+
|
114
125
|
self.card_options = self.attributes["options"]
|
115
126
|
|
116
127
|
evt_name = "step-init"
|
@@ -146,6 +157,18 @@ class CardDecorator(StepDecorator):
|
|
146
157
|
self._task_datastore = task_datastore
|
147
158
|
self._metadata = metadata
|
148
159
|
|
160
|
+
# If we have configs, we need to dump them to a file so we can re-use them
|
161
|
+
# when calling the card creation subprocess.
|
162
|
+
if self._config_values:
|
163
|
+
with tempfile.NamedTemporaryFile(
|
164
|
+
mode="w", encoding="utf-8", delete=False
|
165
|
+
) as config_file:
|
166
|
+
config_value = dump_config_values(flow)
|
167
|
+
json.dump(config_value, config_file)
|
168
|
+
self._config_file_name = config_file.name
|
169
|
+
else:
|
170
|
+
self._config_file_name = None
|
171
|
+
|
149
172
|
card_type = self.attributes["type"]
|
150
173
|
card_class = get_card_class(card_type)
|
151
174
|
|
@@ -179,7 +202,7 @@ class CardDecorator(StepDecorator):
|
|
179
202
|
# we need to ensure that `current.card` has `CardComponentCollector` instantiated only once.
|
180
203
|
if not self._is_event_registered("pre-step"):
|
181
204
|
self._register_event("pre-step")
|
182
|
-
self._set_card_creator(CardCreator(self._create_top_level_args()))
|
205
|
+
self._set_card_creator(CardCreator(self._create_top_level_args(flow)))
|
183
206
|
|
184
207
|
current._update_env(
|
185
208
|
{"card": CardComponentCollector(self._logger, self.card_creator)}
|
@@ -223,6 +246,13 @@ class CardDecorator(StepDecorator):
|
|
223
246
|
self.card_creator.create(mode="render", final=True, **create_options)
|
224
247
|
self.card_creator.create(mode="refresh", final=True, **create_options)
|
225
248
|
|
249
|
+
# Unlink the config file if it exists
|
250
|
+
if self._config_file_name:
|
251
|
+
try:
|
252
|
+
os.unlink(self._config_file_name)
|
253
|
+
except Exception as e:
|
254
|
+
pass
|
255
|
+
|
226
256
|
@staticmethod
|
227
257
|
def _options(mapping):
|
228
258
|
for k, v in mapping.items():
|
@@ -232,9 +262,13 @@ class CardDecorator(StepDecorator):
|
|
232
262
|
for value in v:
|
233
263
|
yield "--%s" % k
|
234
264
|
if not isinstance(value, bool):
|
235
|
-
|
265
|
+
if isinstance(value, tuple):
|
266
|
+
for val in value:
|
267
|
+
yield to_unicode(val)
|
268
|
+
else:
|
269
|
+
yield to_unicode(value)
|
236
270
|
|
237
|
-
def _create_top_level_args(self):
|
271
|
+
def _create_top_level_args(self, flow):
|
238
272
|
top_level_options = {
|
239
273
|
"quiet": True,
|
240
274
|
"metadata": self._metadata.TYPE,
|
@@ -247,4 +281,8 @@ class CardDecorator(StepDecorator):
|
|
247
281
|
# We don't provide --with as all execution is taking place in
|
248
282
|
# the context of the main process
|
249
283
|
}
|
284
|
+
if self._config_values:
|
285
|
+
top_level_options["config-value"] = self._config_values
|
286
|
+
top_level_options["local-config-file"] = self._config_file_name
|
287
|
+
|
250
288
|
return list(self._options(top_level_options))
|
@@ -1,7 +1,14 @@
|
|
1
|
-
|
1
|
+
import re
|
2
|
+
from typing import Dict, List, Optional
|
3
|
+
from metaflow.exception import CommandException, MetaflowException
|
2
4
|
from metaflow.util import get_username, get_latest_run_id
|
3
5
|
|
4
6
|
|
7
|
+
# avoid circular import by having the exception class contained here
|
8
|
+
class KubernetesException(MetaflowException):
|
9
|
+
headline = "Kubernetes error"
|
10
|
+
|
11
|
+
|
5
12
|
def parse_cli_options(flow_name, run_id, user, my_runs, echo):
|
6
13
|
if user and my_runs:
|
7
14
|
raise CommandException("--user and --my-runs are mutually exclusive.")
|
@@ -52,3 +59,50 @@ def qos_requests_and_limits(qos: str, cpu: int, memory: int, storage: int):
|
|
52
59
|
# TODO: Add support for BestEffort once there is a use case for it.
|
53
60
|
# BestEffort - no limit or requests for cpu/memory
|
54
61
|
return qos_requests, qos_limits
|
62
|
+
|
63
|
+
|
64
|
+
def validate_kube_labels(
|
65
|
+
labels: Optional[Dict[str, Optional[str]]],
|
66
|
+
) -> bool:
|
67
|
+
"""Validate label values.
|
68
|
+
|
69
|
+
This validates the kubernetes label values. It does not validate the keys.
|
70
|
+
Ideally, keys should be static and also the validation rules for keys are
|
71
|
+
more complex than those for values. For full validation rules, see:
|
72
|
+
|
73
|
+
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
74
|
+
"""
|
75
|
+
|
76
|
+
def validate_label(s: Optional[str]):
|
77
|
+
regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
|
78
|
+
if not s:
|
79
|
+
# allow empty label
|
80
|
+
return True
|
81
|
+
if not re.search(regex_match, s):
|
82
|
+
raise KubernetesException(
|
83
|
+
'Invalid value: "%s"\n'
|
84
|
+
"A valid label must be an empty string or one that\n"
|
85
|
+
" - Consist of alphanumeric, '-', '_' or '.' characters\n"
|
86
|
+
" - Begins and ends with an alphanumeric character\n"
|
87
|
+
" - Is at most 63 characters" % s
|
88
|
+
)
|
89
|
+
return True
|
90
|
+
|
91
|
+
return all([validate_label(v) for v in labels.values()]) if labels else True
|
92
|
+
|
93
|
+
|
94
|
+
def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
|
95
|
+
try:
|
96
|
+
ret = {}
|
97
|
+
for item_str in items:
|
98
|
+
item = item_str.split("=", 1)
|
99
|
+
if requires_both:
|
100
|
+
item[1] # raise IndexError
|
101
|
+
if str(item[0]) in ret:
|
102
|
+
raise KubernetesException("Duplicate key found: %s" % str(item[0]))
|
103
|
+
ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
|
104
|
+
return ret
|
105
|
+
except KubernetesException as e:
|
106
|
+
raise e
|
107
|
+
except (AttributeError, IndexError):
|
108
|
+
raise KubernetesException("Unable to parse kubernetes list: %s" % items)
|
@@ -1,11 +1,8 @@
|
|
1
|
-
import copy
|
2
1
|
import json
|
3
2
|
import math
|
4
3
|
import os
|
5
|
-
import re
|
6
4
|
import shlex
|
7
5
|
import time
|
8
|
-
from typing import Dict, List, Optional
|
9
6
|
from uuid import uuid4
|
10
7
|
|
11
8
|
from metaflow import current, util
|
@@ -35,7 +32,6 @@ from metaflow.metaflow_config import (
|
|
35
32
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
36
33
|
GCP_SECRET_MANAGER_PREFIX,
|
37
34
|
KUBERNETES_FETCH_EC2_METADATA,
|
38
|
-
KUBERNETES_LABELS,
|
39
35
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
40
36
|
OTEL_ENDPOINT,
|
41
37
|
S3_ENDPOINT_URL,
|
@@ -193,6 +189,7 @@ class Kubernetes(object):
|
|
193
189
|
persistent_volume_claims=None,
|
194
190
|
tolerations=None,
|
195
191
|
labels=None,
|
192
|
+
annotations=None,
|
196
193
|
shared_memory=None,
|
197
194
|
port=None,
|
198
195
|
num_parallel=None,
|
@@ -304,10 +301,6 @@ class Kubernetes(object):
|
|
304
301
|
# see get_datastore_root_from_config in datastore/local.py).
|
305
302
|
)
|
306
303
|
|
307
|
-
_labels = self._get_labels(labels)
|
308
|
-
for k, v in _labels.items():
|
309
|
-
jobset.label(k, v)
|
310
|
-
|
311
304
|
for k in list(
|
312
305
|
[] if not secrets else [secrets] if isinstance(secrets, str) else secrets
|
313
306
|
) + KUBERNETES_SECRETS.split(","):
|
@@ -379,13 +372,16 @@ class Kubernetes(object):
|
|
379
372
|
for name, value in env.items():
|
380
373
|
jobset.environment_variable(name, value)
|
381
374
|
|
382
|
-
|
375
|
+
system_annotations = {
|
383
376
|
"metaflow/user": user,
|
384
377
|
"metaflow/flow_name": flow_name,
|
385
378
|
"metaflow/control-task-id": task_id,
|
379
|
+
"metaflow/run_id": run_id,
|
380
|
+
"metaflow/step_name": step_name,
|
381
|
+
"metaflow/attempt": attempt,
|
386
382
|
}
|
387
383
|
if current.get("project_name"):
|
388
|
-
|
384
|
+
system_annotations.update(
|
389
385
|
{
|
390
386
|
"metaflow/project_name": current.project_name,
|
391
387
|
"metaflow/branch_name": current.branch_name,
|
@@ -393,15 +389,15 @@ class Kubernetes(object):
|
|
393
389
|
}
|
394
390
|
)
|
395
391
|
|
396
|
-
|
397
|
-
|
392
|
+
system_labels = {
|
393
|
+
"app.kubernetes.io/name": "metaflow-task",
|
394
|
+
"app.kubernetes.io/part-of": "metaflow",
|
395
|
+
}
|
398
396
|
|
399
|
-
(
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
.label("app.kubernetes.io/name", "metaflow-task")
|
404
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
397
|
+
jobset.labels({**({} if not labels else labels), **system_labels})
|
398
|
+
|
399
|
+
jobset.annotations(
|
400
|
+
{**({} if not annotations else annotations), **system_annotations}
|
405
401
|
)
|
406
402
|
# We need this task-id set so that all the nodes are aware of the control
|
407
403
|
# task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
|
@@ -491,6 +487,7 @@ class Kubernetes(object):
|
|
491
487
|
port=None,
|
492
488
|
name_pattern=None,
|
493
489
|
qos=None,
|
490
|
+
annotations=None,
|
494
491
|
):
|
495
492
|
if env is None:
|
496
493
|
env = {}
|
@@ -523,7 +520,8 @@ class Kubernetes(object):
|
|
523
520
|
retries=0,
|
524
521
|
step_name=step_name,
|
525
522
|
tolerations=tolerations,
|
526
|
-
labels=
|
523
|
+
labels=labels,
|
524
|
+
annotations=annotations,
|
527
525
|
use_tmpfs=use_tmpfs,
|
528
526
|
tmpfs_tempdir=tmpfs_tempdir,
|
529
527
|
tmpfs_size=tmpfs_size,
|
@@ -642,13 +640,25 @@ class Kubernetes(object):
|
|
642
640
|
|
643
641
|
for name, value in env.items():
|
644
642
|
job.environment_variable(name, value)
|
643
|
+
# Add job specific labels
|
644
|
+
system_labels = {
|
645
|
+
"app.kubernetes.io/name": "metaflow-task",
|
646
|
+
"app.kubernetes.io/part-of": "metaflow",
|
647
|
+
}
|
648
|
+
for name, value in system_labels.items():
|
649
|
+
job.label(name, value)
|
645
650
|
|
646
|
-
annotations
|
647
|
-
|
651
|
+
# Add job specific annotations not set in the decorator.
|
652
|
+
system_annotations = {
|
648
653
|
"metaflow/flow_name": flow_name,
|
654
|
+
"metaflow/run_id": run_id,
|
655
|
+
"metaflow/step_name": step_name,
|
656
|
+
"metaflow/task_id": task_id,
|
657
|
+
"metaflow/attempt": attempt,
|
658
|
+
"metaflow/user": user,
|
649
659
|
}
|
650
660
|
if current.get("project_name"):
|
651
|
-
|
661
|
+
system_annotations.update(
|
652
662
|
{
|
653
663
|
"metaflow/project_name": current.project_name,
|
654
664
|
"metaflow/branch_name": current.branch_name,
|
@@ -656,7 +666,7 @@ class Kubernetes(object):
|
|
656
666
|
}
|
657
667
|
)
|
658
668
|
|
659
|
-
for name, value in
|
669
|
+
for name, value in system_annotations.items():
|
660
670
|
job.annotation(name, value)
|
661
671
|
|
662
672
|
(
|
@@ -775,60 +785,3 @@ class Kubernetes(object):
|
|
775
785
|
"stderr",
|
776
786
|
job_id=self._job.id,
|
777
787
|
)
|
778
|
-
|
779
|
-
@staticmethod
|
780
|
-
def _get_labels(extra_labels=None):
|
781
|
-
if extra_labels is None:
|
782
|
-
extra_labels = {}
|
783
|
-
env_labels = KUBERNETES_LABELS.split(",") if KUBERNETES_LABELS else []
|
784
|
-
env_labels = parse_kube_keyvalue_list(env_labels, False)
|
785
|
-
labels = {**env_labels, **extra_labels}
|
786
|
-
validate_kube_labels(labels)
|
787
|
-
return labels
|
788
|
-
|
789
|
-
|
790
|
-
def validate_kube_labels(
|
791
|
-
labels: Optional[Dict[str, Optional[str]]],
|
792
|
-
) -> bool:
|
793
|
-
"""Validate label values.
|
794
|
-
|
795
|
-
This validates the kubernetes label values. It does not validate the keys.
|
796
|
-
Ideally, keys should be static and also the validation rules for keys are
|
797
|
-
more complex than those for values. For full validation rules, see:
|
798
|
-
|
799
|
-
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
800
|
-
"""
|
801
|
-
|
802
|
-
def validate_label(s: Optional[str]):
|
803
|
-
regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
|
804
|
-
if not s:
|
805
|
-
# allow empty label
|
806
|
-
return True
|
807
|
-
if not re.search(regex_match, s):
|
808
|
-
raise KubernetesException(
|
809
|
-
'Invalid value: "%s"\n'
|
810
|
-
"A valid label must be an empty string or one that\n"
|
811
|
-
" - Consist of alphanumeric, '-', '_' or '.' characters\n"
|
812
|
-
" - Begins and ends with an alphanumeric character\n"
|
813
|
-
" - Is at most 63 characters" % s
|
814
|
-
)
|
815
|
-
return True
|
816
|
-
|
817
|
-
return all([validate_label(v) for v in labels.values()]) if labels else True
|
818
|
-
|
819
|
-
|
820
|
-
def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
|
821
|
-
try:
|
822
|
-
ret = {}
|
823
|
-
for item_str in items:
|
824
|
-
item = item_str.split("=", 1)
|
825
|
-
if requires_both:
|
826
|
-
item[1] # raise IndexError
|
827
|
-
if str(item[0]) in ret:
|
828
|
-
raise KubernetesException("Duplicate key found: %s" % str(item[0]))
|
829
|
-
ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
|
830
|
-
return ret
|
831
|
-
except KubernetesException as e:
|
832
|
-
raise e
|
833
|
-
except (AttributeError, IndexError):
|
834
|
-
raise KubernetesException("Unable to parse kubernetes list: %s" % items)
|