metaflow 2.12.39__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. metaflow/__init__.py +1 -1
  2. metaflow/cli.py +111 -36
  3. metaflow/cli_args.py +2 -2
  4. metaflow/cli_components/run_cmds.py +3 -1
  5. metaflow/datastore/flow_datastore.py +2 -2
  6. metaflow/exception.py +8 -2
  7. metaflow/flowspec.py +48 -36
  8. metaflow/graph.py +28 -27
  9. metaflow/includefile.py +2 -2
  10. metaflow/lint.py +35 -20
  11. metaflow/metadata_provider/heartbeat.py +23 -8
  12. metaflow/metaflow_config.py +7 -0
  13. metaflow/parameters.py +11 -4
  14. metaflow/plugins/argo/argo_client.py +0 -2
  15. metaflow/plugins/argo/argo_workflows.py +86 -104
  16. metaflow/plugins/argo/argo_workflows_cli.py +0 -1
  17. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  18. metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
  19. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  20. metaflow/plugins/aws/aws_utils.py +6 -1
  21. metaflow/plugins/aws/batch/batch_client.py +1 -3
  22. metaflow/plugins/aws/batch/batch_decorator.py +11 -11
  23. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  24. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  25. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  26. metaflow/plugins/aws/step_functions/step_functions.py +1 -1
  27. metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
  28. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  29. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
  30. metaflow/plugins/cards/card_creator.py +1 -0
  31. metaflow/plugins/cards/card_decorator.py +46 -8
  32. metaflow/plugins/kubernetes/kube_utils.py +55 -1
  33. metaflow/plugins/kubernetes/kubernetes.py +33 -80
  34. metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
  35. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
  36. metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
  37. metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
  38. metaflow/plugins/pypi/bootstrap.py +249 -81
  39. metaflow/plugins/pypi/conda_environment.py +83 -27
  40. metaflow/plugins/pypi/micromamba.py +77 -36
  41. metaflow/plugins/pypi/pip.py +9 -6
  42. metaflow/plugins/pypi/utils.py +4 -2
  43. metaflow/runner/click_api.py +175 -39
  44. metaflow/runner/deployer_impl.py +6 -1
  45. metaflow/runner/metaflow_runner.py +6 -1
  46. metaflow/runner/utils.py +5 -0
  47. metaflow/user_configs/config_options.py +87 -34
  48. metaflow/user_configs/config_parameters.py +44 -25
  49. metaflow/util.py +2 -2
  50. metaflow/version.py +1 -1
  51. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
  52. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/RECORD +56 -56
  53. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
  54. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
  55. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
  56. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import sys
2
2
  import json
3
+ import time
3
4
  import tempfile
4
5
  from typing import ClassVar, Optional
5
6
 
@@ -170,6 +171,47 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
170
171
  command_obj.sync_wait()
171
172
  return command_obj.process.returncode == 0
172
173
 
174
+ def wait_for_completion(self, timeout: Optional[int] = None):
175
+ """
176
+ Wait for the workflow to complete or timeout.
177
+
178
+ Parameters
179
+ ----------
180
+ timeout : int, optional, default None
181
+ Maximum time in seconds to wait for workflow completion.
182
+ If None, waits indefinitely.
183
+
184
+ Raises
185
+ ------
186
+ TimeoutError
187
+ If the workflow does not complete within the specified timeout period.
188
+ """
189
+ start_time = time.time()
190
+ check_interval = 5
191
+ while self.is_running:
192
+ if timeout is not None and (time.time() - start_time) > timeout:
193
+ raise TimeoutError(
194
+ "Workflow did not complete within specified timeout."
195
+ )
196
+ time.sleep(check_interval)
197
+
198
+ @property
199
+ def is_running(self):
200
+ """
201
+ Check if the workflow is currently running.
202
+
203
+ Returns
204
+ -------
205
+ bool
206
+ True if the workflow status is either 'Pending' or 'Running',
207
+ False otherwise.
208
+ """
209
+ workflow_status = self.status
210
+ # full list of all states present here:
211
+ # https://github.com/argoproj/argo-workflows/blob/main/pkg/apis/workflow/v1alpha1/workflow_types.go#L54
212
+ # we only consider non-terminal states to determine if the workflow has not finished
213
+ return workflow_status is not None and workflow_status in ["Pending", "Running"]
214
+
173
215
  @property
174
216
  def status(self) -> Optional[str]:
175
217
  """
@@ -1,5 +1,4 @@
1
1
  import sys
2
- from hashlib import md5
3
2
 
4
3
 
5
4
  def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel):
@@ -1,5 +1,4 @@
1
1
  import re
2
- import requests
3
2
 
4
3
  from metaflow.exception import MetaflowException
5
4
 
@@ -30,6 +29,10 @@ def get_ec2_instance_metadata():
30
29
  - ec2-region
31
30
  - ec2-availability-zone
32
31
  """
32
+
33
+ # TODO: Remove dependency on requests
34
+ import requests
35
+
33
36
  meta = {}
34
37
  # Capture AWS instance identity metadata. This is best-effort only since
35
38
  # access to this end-point might be blocked on AWS and not available
@@ -159,6 +162,8 @@ def compute_resource_attributes(decos, compute_deco, resource_defaults):
159
162
  # Here we don't have ints, so we compare the value and raise
160
163
  # an exception if not equal
161
164
  if my_val != v:
165
+ # TODO: Throw a better exception since the user has no
166
+ # knowledge of 'compute' decorator
162
167
  raise MetaflowException(
163
168
  "'resources' and compute decorator have conflicting "
164
169
  "values for '%s'. Please use consistent values or "
@@ -1,9 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
- from collections import defaultdict, deque
2
+ from collections import defaultdict
3
3
  import copy
4
4
  import random
5
- import select
6
- import sys
7
5
  import time
8
6
  import hashlib
9
7
 
@@ -1,34 +1,30 @@
1
1
  import os
2
- import sys
3
2
  import platform
4
- import requests
3
+ import sys
5
4
  import time
6
5
 
7
- from metaflow import util
8
6
  from metaflow import R, current
9
-
10
7
  from metaflow.decorators import StepDecorator
11
- from metaflow.plugins.resources_decorator import ResourcesDecorator
12
- from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
13
8
  from metaflow.metadata_provider import MetaDatum
14
9
  from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
15
10
  from metaflow.metaflow_config import (
16
- ECS_S3_ACCESS_IAM_ROLE,
17
- BATCH_JOB_QUEUE,
18
11
  BATCH_CONTAINER_IMAGE,
19
12
  BATCH_CONTAINER_REGISTRY,
20
- ECS_FARGATE_EXECUTION_ROLE,
13
+ BATCH_JOB_QUEUE,
21
14
  DATASTORE_LOCAL_DIR,
15
+ ECS_FARGATE_EXECUTION_ROLE,
16
+ ECS_S3_ACCESS_IAM_ROLE,
22
17
  )
18
+ from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
23
19
  from metaflow.sidecar import Sidecar
24
20
  from metaflow.unbounded_foreach import UBF_CONTROL
25
21
 
26
- from .batch import BatchException
27
22
  from ..aws_utils import (
28
23
  compute_resource_attributes,
29
24
  get_docker_registry,
30
25
  get_ec2_instance_metadata,
31
26
  )
27
+ from .batch import BatchException
32
28
 
33
29
 
34
30
  class BatchDecorator(StepDecorator):
@@ -280,6 +276,10 @@ class BatchDecorator(StepDecorator):
280
276
  # Metaflow would be running the container agent compatible with
281
277
  # version V4.
282
278
  # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
279
+
280
+ # TODO: Remove dependency on requests
281
+ import requests
282
+
283
283
  try:
284
284
  logs_meta = (
285
285
  requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
@@ -386,7 +386,7 @@ class BatchDecorator(StepDecorator):
386
386
  len(flow._control_mapper_tasks),
387
387
  )
388
388
  )
389
- except Exception as e:
389
+ except Exception:
390
390
  pass
391
391
  raise Exception(
392
392
  "Batch secondary workers did not finish in %s seconds" % TIMEOUT
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
50
50
  The secret payload from AWS is EITHER a string OR a binary blob.
51
51
 
52
52
  If the secret contains a string payload ("SecretString"):
53
- - if the `parse_secret_string_as_json` option is True (default):
53
+ - if the `json` option is True (default):
54
54
  {SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
55
55
  top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
56
56
  always be casted to a string (if not already a string).
57
- - If `parse_secret_string_as_json` option is False:
58
- {SecretString} will be returned as a single entry in the result, with the key being the secret_id.
57
+ - If `json` option is False:
58
+ {SecretString} will be returned as a single entry in the result, where the key is either:
59
+ - the `secret_id`, OR
60
+ - the value set by `options={"env_var_name": custom_env_var_name}`.
59
61
 
60
- Otherwise, the secret contains a binary blob payload ("SecretBinary"). In this case
61
- - The result dic contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string
62
+ Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
63
+ - The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
62
64
 
63
- All keys in the result are sanitized to be more valid environment variable names. This is done on a best effort
65
+ All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
64
66
  basis. Further validation is expected to be done by the invoking @secrets decorator itself.
65
67
 
66
- :param secret_id: ARN or friendly name of the secret
67
- :param options: unused
68
- :param role: AWS IAM Role ARN to assume before reading the secret
69
- :return: dict of environment variables. All keys and values are strings.
68
+ :param secret_id: ARN or friendly name of the secret.
69
+ :param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
70
+ :param role: AWS IAM Role ARN to assume before reading the secret.
71
+ :return: Dictionary of environment variables. All keys and values are strings.
70
72
  """
73
+
71
74
  import botocore
72
75
  from metaflow.plugins.aws.aws_client import get_aws_client
73
76
 
@@ -1,8 +1,5 @@
1
- import os
2
1
  import time
3
2
 
4
- import requests
5
-
6
3
  from metaflow.metaflow_config import SFN_DYNAMO_DB_TABLE
7
4
 
8
5
 
@@ -3,7 +3,7 @@ import os
3
3
  import random
4
4
  import string
5
5
  import zlib
6
- from itertools import dropwhile, islice
6
+ from itertools import dropwhile
7
7
 
8
8
  from metaflow.util import to_bytes
9
9
 
@@ -236,7 +236,7 @@ class StepFunctions(object):
236
236
  return parameters.get("metaflow.owner"), parameters.get(
237
237
  "metaflow.production_token"
238
238
  )
239
- except KeyError as e:
239
+ except KeyError:
240
240
  raise StepFunctionsException(
241
241
  "An existing non-metaflow "
242
242
  "workflow with the same name as "
@@ -4,7 +4,6 @@ import re
4
4
  from hashlib import sha1
5
5
 
6
6
  from metaflow import JSONType, current, decorators, parameters
7
- from metaflow.client.core import get_metadata
8
7
  from metaflow._vendor import click
9
8
  from metaflow.exception import MetaflowException, MetaflowInternalError
10
9
  from metaflow.metaflow_config import (
@@ -1,4 +1,3 @@
1
- import json
2
1
  import os
3
2
  import time
4
3
 
@@ -1,6 +1,5 @@
1
1
  import sys
2
2
  import json
3
- import tempfile
4
3
  from typing import ClassVar, Optional, List
5
4
 
6
5
  from metaflow.plugins.aws.step_functions.step_functions import StepFunctions
@@ -122,6 +122,7 @@ class CardCreator:
122
122
  executable,
123
123
  sys.argv[0],
124
124
  ]
125
+
125
126
  cmd += self._top_level_options + [
126
127
  "card",
127
128
  "create",
@@ -1,13 +1,16 @@
1
+ import json
2
+ import os
3
+ import re
4
+ import tempfile
5
+
1
6
  from metaflow.decorators import StepDecorator
2
7
  from metaflow.metaflow_current import current
8
+ from metaflow.user_configs.config_options import ConfigInput
9
+ from metaflow.user_configs.config_parameters import dump_config_values
3
10
  from metaflow.util import to_unicode
11
+
4
12
  from .component_serializer import CardComponentCollector, get_card_class
5
13
  from .card_creator import CardCreator
6
-
7
-
8
- # from metaflow import get_metadata
9
- import re
10
-
11
14
  from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX
12
15
 
13
16
  ASYNC_TIMEOUT = 30
@@ -111,6 +114,14 @@ class CardDecorator(StepDecorator):
111
114
  self._logger = logger
112
115
  self.card_options = None
113
116
 
117
+ # We check for configuration options. We do this here before they are
118
+ # converted to properties.
119
+ self._config_values = [
120
+ (config.name, ConfigInput.make_key_name(config.name))
121
+ for _, config in flow._get_parameters()
122
+ if config.IS_CONFIG_PARAMETER
123
+ ]
124
+
114
125
  self.card_options = self.attributes["options"]
115
126
 
116
127
  evt_name = "step-init"
@@ -146,6 +157,18 @@ class CardDecorator(StepDecorator):
146
157
  self._task_datastore = task_datastore
147
158
  self._metadata = metadata
148
159
 
160
+ # If we have configs, we need to dump them to a file so we can re-use them
161
+ # when calling the card creation subprocess.
162
+ if self._config_values:
163
+ with tempfile.NamedTemporaryFile(
164
+ mode="w", encoding="utf-8", delete=False
165
+ ) as config_file:
166
+ config_value = dump_config_values(flow)
167
+ json.dump(config_value, config_file)
168
+ self._config_file_name = config_file.name
169
+ else:
170
+ self._config_file_name = None
171
+
149
172
  card_type = self.attributes["type"]
150
173
  card_class = get_card_class(card_type)
151
174
 
@@ -179,7 +202,7 @@ class CardDecorator(StepDecorator):
179
202
  # we need to ensure that `current.card` has `CardComponentCollector` instantiated only once.
180
203
  if not self._is_event_registered("pre-step"):
181
204
  self._register_event("pre-step")
182
- self._set_card_creator(CardCreator(self._create_top_level_args()))
205
+ self._set_card_creator(CardCreator(self._create_top_level_args(flow)))
183
206
 
184
207
  current._update_env(
185
208
  {"card": CardComponentCollector(self._logger, self.card_creator)}
@@ -223,6 +246,13 @@ class CardDecorator(StepDecorator):
223
246
  self.card_creator.create(mode="render", final=True, **create_options)
224
247
  self.card_creator.create(mode="refresh", final=True, **create_options)
225
248
 
249
+ # Unlink the config file if it exists
250
+ if self._config_file_name:
251
+ try:
252
+ os.unlink(self._config_file_name)
253
+ except Exception as e:
254
+ pass
255
+
226
256
  @staticmethod
227
257
  def _options(mapping):
228
258
  for k, v in mapping.items():
@@ -232,9 +262,13 @@ class CardDecorator(StepDecorator):
232
262
  for value in v:
233
263
  yield "--%s" % k
234
264
  if not isinstance(value, bool):
235
- yield to_unicode(value)
265
+ if isinstance(value, tuple):
266
+ for val in value:
267
+ yield to_unicode(val)
268
+ else:
269
+ yield to_unicode(value)
236
270
 
237
- def _create_top_level_args(self):
271
+ def _create_top_level_args(self, flow):
238
272
  top_level_options = {
239
273
  "quiet": True,
240
274
  "metadata": self._metadata.TYPE,
@@ -247,4 +281,8 @@ class CardDecorator(StepDecorator):
247
281
  # We don't provide --with as all execution is taking place in
248
282
  # the context of the main process
249
283
  }
284
+ if self._config_values:
285
+ top_level_options["config-value"] = self._config_values
286
+ top_level_options["local-config-file"] = self._config_file_name
287
+
250
288
  return list(self._options(top_level_options))
@@ -1,7 +1,14 @@
1
- from metaflow.exception import CommandException
1
+ import re
2
+ from typing import Dict, List, Optional
3
+ from metaflow.exception import CommandException, MetaflowException
2
4
  from metaflow.util import get_username, get_latest_run_id
3
5
 
4
6
 
7
+ # avoid circular import by having the exception class contained here
8
+ class KubernetesException(MetaflowException):
9
+ headline = "Kubernetes error"
10
+
11
+
5
12
  def parse_cli_options(flow_name, run_id, user, my_runs, echo):
6
13
  if user and my_runs:
7
14
  raise CommandException("--user and --my-runs are mutually exclusive.")
@@ -52,3 +59,50 @@ def qos_requests_and_limits(qos: str, cpu: int, memory: int, storage: int):
52
59
  # TODO: Add support for BestEffort once there is a use case for it.
53
60
  # BestEffort - no limit or requests for cpu/memory
54
61
  return qos_requests, qos_limits
62
+
63
+
64
+ def validate_kube_labels(
65
+ labels: Optional[Dict[str, Optional[str]]],
66
+ ) -> bool:
67
+ """Validate label values.
68
+
69
+ This validates the kubernetes label values. It does not validate the keys.
70
+ Ideally, keys should be static and also the validation rules for keys are
71
+ more complex than those for values. For full validation rules, see:
72
+
73
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
74
+ """
75
+
76
+ def validate_label(s: Optional[str]):
77
+ regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
78
+ if not s:
79
+ # allow empty label
80
+ return True
81
+ if not re.search(regex_match, s):
82
+ raise KubernetesException(
83
+ 'Invalid value: "%s"\n'
84
+ "A valid label must be an empty string or one that\n"
85
+ " - Consist of alphanumeric, '-', '_' or '.' characters\n"
86
+ " - Begins and ends with an alphanumeric character\n"
87
+ " - Is at most 63 characters" % s
88
+ )
89
+ return True
90
+
91
+ return all([validate_label(v) for v in labels.values()]) if labels else True
92
+
93
+
94
+ def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
95
+ try:
96
+ ret = {}
97
+ for item_str in items:
98
+ item = item_str.split("=", 1)
99
+ if requires_both:
100
+ item[1] # raise IndexError
101
+ if str(item[0]) in ret:
102
+ raise KubernetesException("Duplicate key found: %s" % str(item[0]))
103
+ ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
104
+ return ret
105
+ except KubernetesException as e:
106
+ raise e
107
+ except (AttributeError, IndexError):
108
+ raise KubernetesException("Unable to parse kubernetes list: %s" % items)
@@ -1,11 +1,8 @@
1
- import copy
2
1
  import json
3
2
  import math
4
3
  import os
5
- import re
6
4
  import shlex
7
5
  import time
8
- from typing import Dict, List, Optional
9
6
  from uuid import uuid4
10
7
 
11
8
  from metaflow import current, util
@@ -35,7 +32,6 @@ from metaflow.metaflow_config import (
35
32
  DEFAULT_SECRETS_BACKEND_TYPE,
36
33
  GCP_SECRET_MANAGER_PREFIX,
37
34
  KUBERNETES_FETCH_EC2_METADATA,
38
- KUBERNETES_LABELS,
39
35
  KUBERNETES_SANDBOX_INIT_SCRIPT,
40
36
  OTEL_ENDPOINT,
41
37
  S3_ENDPOINT_URL,
@@ -193,6 +189,7 @@ class Kubernetes(object):
193
189
  persistent_volume_claims=None,
194
190
  tolerations=None,
195
191
  labels=None,
192
+ annotations=None,
196
193
  shared_memory=None,
197
194
  port=None,
198
195
  num_parallel=None,
@@ -304,10 +301,6 @@ class Kubernetes(object):
304
301
  # see get_datastore_root_from_config in datastore/local.py).
305
302
  )
306
303
 
307
- _labels = self._get_labels(labels)
308
- for k, v in _labels.items():
309
- jobset.label(k, v)
310
-
311
304
  for k in list(
312
305
  [] if not secrets else [secrets] if isinstance(secrets, str) else secrets
313
306
  ) + KUBERNETES_SECRETS.split(","):
@@ -379,13 +372,16 @@ class Kubernetes(object):
379
372
  for name, value in env.items():
380
373
  jobset.environment_variable(name, value)
381
374
 
382
- annotations = {
375
+ system_annotations = {
383
376
  "metaflow/user": user,
384
377
  "metaflow/flow_name": flow_name,
385
378
  "metaflow/control-task-id": task_id,
379
+ "metaflow/run_id": run_id,
380
+ "metaflow/step_name": step_name,
381
+ "metaflow/attempt": attempt,
386
382
  }
387
383
  if current.get("project_name"):
388
- annotations.update(
384
+ system_annotations.update(
389
385
  {
390
386
  "metaflow/project_name": current.project_name,
391
387
  "metaflow/branch_name": current.branch_name,
@@ -393,15 +389,15 @@ class Kubernetes(object):
393
389
  }
394
390
  )
395
391
 
396
- for name, value in annotations.items():
397
- jobset.annotation(name, value)
392
+ system_labels = {
393
+ "app.kubernetes.io/name": "metaflow-task",
394
+ "app.kubernetes.io/part-of": "metaflow",
395
+ }
398
396
 
399
- (
400
- jobset.annotation("metaflow/run_id", run_id)
401
- .annotation("metaflow/step_name", step_name)
402
- .annotation("metaflow/attempt", attempt)
403
- .label("app.kubernetes.io/name", "metaflow-task")
404
- .label("app.kubernetes.io/part-of", "metaflow")
397
+ jobset.labels({**({} if not labels else labels), **system_labels})
398
+
399
+ jobset.annotations(
400
+ {**({} if not annotations else annotations), **system_annotations}
405
401
  )
406
402
  # We need this task-id set so that all the nodes are aware of the control
407
403
  # task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
@@ -491,6 +487,7 @@ class Kubernetes(object):
491
487
  port=None,
492
488
  name_pattern=None,
493
489
  qos=None,
490
+ annotations=None,
494
491
  ):
495
492
  if env is None:
496
493
  env = {}
@@ -523,7 +520,8 @@ class Kubernetes(object):
523
520
  retries=0,
524
521
  step_name=step_name,
525
522
  tolerations=tolerations,
526
- labels=self._get_labels(labels),
523
+ labels=labels,
524
+ annotations=annotations,
527
525
  use_tmpfs=use_tmpfs,
528
526
  tmpfs_tempdir=tmpfs_tempdir,
529
527
  tmpfs_size=tmpfs_size,
@@ -642,13 +640,25 @@ class Kubernetes(object):
642
640
 
643
641
  for name, value in env.items():
644
642
  job.environment_variable(name, value)
643
+ # Add job specific labels
644
+ system_labels = {
645
+ "app.kubernetes.io/name": "metaflow-task",
646
+ "app.kubernetes.io/part-of": "metaflow",
647
+ }
648
+ for name, value in system_labels.items():
649
+ job.label(name, value)
645
650
 
646
- annotations = {
647
- "metaflow/user": user,
651
+ # Add job specific annotations not set in the decorator.
652
+ system_annotations = {
648
653
  "metaflow/flow_name": flow_name,
654
+ "metaflow/run_id": run_id,
655
+ "metaflow/step_name": step_name,
656
+ "metaflow/task_id": task_id,
657
+ "metaflow/attempt": attempt,
658
+ "metaflow/user": user,
649
659
  }
650
660
  if current.get("project_name"):
651
- annotations.update(
661
+ system_annotations.update(
652
662
  {
653
663
  "metaflow/project_name": current.project_name,
654
664
  "metaflow/branch_name": current.branch_name,
@@ -656,7 +666,7 @@ class Kubernetes(object):
656
666
  }
657
667
  )
658
668
 
659
- for name, value in annotations.items():
669
+ for name, value in system_annotations.items():
660
670
  job.annotation(name, value)
661
671
 
662
672
  (
@@ -775,60 +785,3 @@ class Kubernetes(object):
775
785
  "stderr",
776
786
  job_id=self._job.id,
777
787
  )
778
-
779
- @staticmethod
780
- def _get_labels(extra_labels=None):
781
- if extra_labels is None:
782
- extra_labels = {}
783
- env_labels = KUBERNETES_LABELS.split(",") if KUBERNETES_LABELS else []
784
- env_labels = parse_kube_keyvalue_list(env_labels, False)
785
- labels = {**env_labels, **extra_labels}
786
- validate_kube_labels(labels)
787
- return labels
788
-
789
-
790
- def validate_kube_labels(
791
- labels: Optional[Dict[str, Optional[str]]],
792
- ) -> bool:
793
- """Validate label values.
794
-
795
- This validates the kubernetes label values. It does not validate the keys.
796
- Ideally, keys should be static and also the validation rules for keys are
797
- more complex than those for values. For full validation rules, see:
798
-
799
- https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
800
- """
801
-
802
- def validate_label(s: Optional[str]):
803
- regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
804
- if not s:
805
- # allow empty label
806
- return True
807
- if not re.search(regex_match, s):
808
- raise KubernetesException(
809
- 'Invalid value: "%s"\n'
810
- "A valid label must be an empty string or one that\n"
811
- " - Consist of alphanumeric, '-', '_' or '.' characters\n"
812
- " - Begins and ends with an alphanumeric character\n"
813
- " - Is at most 63 characters" % s
814
- )
815
- return True
816
-
817
- return all([validate_label(v) for v in labels.values()]) if labels else True
818
-
819
-
820
- def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
821
- try:
822
- ret = {}
823
- for item_str in items:
824
- item = item_str.split("=", 1)
825
- if requires_both:
826
- item[1] # raise IndexError
827
- if str(item[0]) in ret:
828
- raise KubernetesException("Duplicate key found: %s" % str(item[0]))
829
- ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
830
- return ret
831
- except KubernetesException as e:
832
- raise e
833
- except (AttributeError, IndexError):
834
- raise KubernetesException("Unable to parse kubernetes list: %s" % items)