ob-metaflow 2.12.39.1__py2.py3-none-any.whl → 2.13.1.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (54) hide show
  1. metaflow/__init__.py +1 -1
  2. metaflow/cli.py +111 -36
  3. metaflow/cli_args.py +2 -2
  4. metaflow/cli_components/run_cmds.py +3 -1
  5. metaflow/datastore/flow_datastore.py +2 -2
  6. metaflow/exception.py +8 -2
  7. metaflow/flowspec.py +48 -36
  8. metaflow/graph.py +28 -27
  9. metaflow/includefile.py +2 -2
  10. metaflow/lint.py +35 -20
  11. metaflow/metadata_provider/heartbeat.py +23 -8
  12. metaflow/metaflow_config.py +7 -0
  13. metaflow/parameters.py +11 -4
  14. metaflow/plugins/argo/argo_client.py +0 -2
  15. metaflow/plugins/argo/argo_workflows.py +86 -104
  16. metaflow/plugins/argo/argo_workflows_cli.py +0 -1
  17. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  18. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  19. metaflow/plugins/aws/aws_utils.py +6 -1
  20. metaflow/plugins/aws/batch/batch_client.py +1 -3
  21. metaflow/plugins/aws/batch/batch_decorator.py +11 -11
  22. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  23. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  24. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  25. metaflow/plugins/aws/step_functions/step_functions.py +1 -1
  26. metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
  27. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  28. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
  29. metaflow/plugins/cards/card_creator.py +1 -0
  30. metaflow/plugins/cards/card_decorator.py +46 -8
  31. metaflow/plugins/kubernetes/kube_utils.py +55 -1
  32. metaflow/plugins/kubernetes/kubernetes.py +33 -80
  33. metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
  34. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
  35. metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
  36. metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
  37. metaflow/plugins/pypi/bootstrap.py +87 -54
  38. metaflow/plugins/pypi/conda_environment.py +7 -6
  39. metaflow/plugins/pypi/micromamba.py +35 -21
  40. metaflow/plugins/pypi/pip.py +2 -4
  41. metaflow/plugins/pypi/utils.py +4 -2
  42. metaflow/runner/click_api.py +175 -39
  43. metaflow/runner/deployer_impl.py +6 -1
  44. metaflow/runner/metaflow_runner.py +6 -1
  45. metaflow/user_configs/config_options.py +87 -34
  46. metaflow/user_configs/config_parameters.py +44 -25
  47. metaflow/util.py +2 -2
  48. metaflow/version.py +1 -1
  49. {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/METADATA +2 -2
  50. {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/RECORD +54 -54
  51. {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/WHEEL +1 -1
  52. {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/LICENSE +0 -0
  53. {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/entry_points.txt +0 -0
  54. {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/top_level.txt +0 -0
metaflow/lint.py CHANGED
@@ -52,7 +52,7 @@ def check_reserved_words(graph):
52
52
  msg = "Step name *%s* is a reserved word. Choose another name for the " "step."
53
53
  for node in graph:
54
54
  if node.name in RESERVED:
55
- raise LintWarn(msg % node.name)
55
+ raise LintWarn(msg % node.name, node.func_lineno, node.source_file)
56
56
 
57
57
 
58
58
  @linter.ensure_fundamentals
@@ -76,9 +76,9 @@ def check_that_end_is_end(graph):
76
76
  node = graph["end"]
77
77
 
78
78
  if node.has_tail_next or node.invalid_tail_next:
79
- raise LintWarn(msg0, node.tail_next_lineno)
79
+ raise LintWarn(msg0, node.tail_next_lineno, node.source_file)
80
80
  if node.num_args > 1:
81
- raise LintWarn(msg1, node.tail_next_lineno)
81
+ raise LintWarn(msg1, node.tail_next_lineno, node.source_file)
82
82
 
83
83
 
84
84
  @linter.ensure_fundamentals
@@ -90,7 +90,7 @@ def check_step_names(graph):
90
90
  )
91
91
  for node in graph:
92
92
  if re.search("[^a-z0-9_]", node.name) or node.name[0] == "_":
93
- raise LintWarn(msg.format(node), node.func_lineno)
93
+ raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
94
94
 
95
95
 
96
96
  @linter.ensure_fundamentals
@@ -108,11 +108,11 @@ def check_num_args(graph):
108
108
  msg2 = "Step *{0.name}* is missing the 'self' argument."
109
109
  for node in graph:
110
110
  if node.num_args > 2:
111
- raise LintWarn(msg0.format(node), node.func_lineno)
111
+ raise LintWarn(msg0.format(node), node.func_lineno, node.source_file)
112
112
  elif node.num_args == 2 and node.type != "join":
113
- raise LintWarn(msg1.format(node), node.func_lineno)
113
+ raise LintWarn(msg1.format(node), node.func_lineno, node.source_file)
114
114
  elif node.num_args == 0:
115
- raise LintWarn(msg2.format(node), node.func_lineno)
115
+ raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
116
116
 
117
117
 
118
118
  @linter.ensure_static_graph
@@ -125,7 +125,7 @@ def check_static_transitions(graph):
125
125
  )
126
126
  for node in graph:
127
127
  if node.type != "end" and not node.has_tail_next:
128
- raise LintWarn(msg.format(node), node.func_lineno)
128
+ raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
129
129
 
130
130
 
131
131
  @linter.ensure_static_graph
@@ -138,7 +138,7 @@ def check_valid_transitions(graph):
138
138
  )
139
139
  for node in graph:
140
140
  if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
141
- raise LintWarn(msg.format(node), node.tail_next_lineno)
141
+ raise LintWarn(msg.format(node), node.tail_next_lineno, node.source_file)
142
142
 
143
143
 
144
144
  @linter.ensure_static_graph
@@ -151,7 +151,11 @@ def check_unknown_transitions(graph):
151
151
  for node in graph:
152
152
  unknown = [n for n in node.out_funcs if n not in graph]
153
153
  if unknown:
154
- raise LintWarn(msg.format(node, step=unknown[0]), node.tail_next_lineno)
154
+ raise LintWarn(
155
+ msg.format(node, step=unknown[0]),
156
+ node.tail_next_lineno,
157
+ node.source_file,
158
+ )
155
159
 
156
160
 
157
161
  @linter.ensure_acyclicity
@@ -167,7 +171,9 @@ def check_for_acyclicity(graph):
167
171
  for n in node.out_funcs:
168
172
  if n in seen:
169
173
  path = "->".join(seen + [n])
170
- raise LintWarn(msg.format(path), node.tail_next_lineno)
174
+ raise LintWarn(
175
+ msg.format(path), node.tail_next_lineno, node.source_file
176
+ )
171
177
  else:
172
178
  check_path(graph[n], seen + [n])
173
179
 
@@ -195,7 +201,7 @@ def check_for_orphans(graph):
195
201
  orphans = nodeset - seen
196
202
  if orphans:
197
203
  orphan = graph[list(orphans)[0]]
198
- raise LintWarn(msg.format(orphan), orphan.func_lineno)
204
+ raise LintWarn(msg.format(orphan), orphan.func_lineno, orphan.source_file)
199
205
 
200
206
 
201
207
  @linter.ensure_static_graph
@@ -230,7 +236,9 @@ def check_split_join_balance(graph):
230
236
  if split_stack:
231
237
  _, split_roots = split_stack.pop()
232
238
  roots = ", ".join(split_roots)
233
- raise LintWarn(msg0.format(roots=roots))
239
+ raise LintWarn(
240
+ msg0.format(roots=roots), node.func_lineno, node.source_file
241
+ )
234
242
  elif node.type == "join":
235
243
  if split_stack:
236
244
  _, split_roots = split_stack[-1]
@@ -243,9 +251,10 @@ def check_split_join_balance(graph):
243
251
  node, paths=paths, num_roots=len(split_roots), roots=roots
244
252
  ),
245
253
  node.func_lineno,
254
+ node.source_file,
246
255
  )
247
256
  else:
248
- raise LintWarn(msg2.format(node), node.func_lineno)
257
+ raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
249
258
 
250
259
  # check that incoming steps come from the same lineage
251
260
  # (no cross joins)
@@ -256,7 +265,7 @@ def check_split_join_balance(graph):
256
265
  return tuple(graph[n].split_parents)
257
266
 
258
267
  if not all_equal(map(parents, node.in_funcs)):
259
- raise LintWarn(msg3.format(node), node.func_lineno)
268
+ raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
260
269
 
261
270
  for n in node.out_funcs:
262
271
  traverse(graph[n], new_stack)
@@ -276,7 +285,9 @@ def check_empty_foreaches(graph):
276
285
  if node.type == "foreach":
277
286
  joins = [n for n in node.out_funcs if graph[n].type == "join"]
278
287
  if joins:
279
- raise LintWarn(msg.format(node, join=joins[0]))
288
+ raise LintWarn(
289
+ msg.format(node, join=joins[0]), node.func_lineno, node.source_file
290
+ )
280
291
 
281
292
 
282
293
  @linter.ensure_static_graph
@@ -290,7 +301,7 @@ def check_parallel_step_after_next(graph):
290
301
  if node.parallel_foreach and not all(
291
302
  graph[out_node].parallel_step for out_node in node.out_funcs
292
303
  ):
293
- raise LintWarn(msg.format(node))
304
+ raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
294
305
 
295
306
 
296
307
  @linter.ensure_static_graph
@@ -303,7 +314,9 @@ def check_join_followed_by_parallel_step(graph):
303
314
  )
304
315
  for node in graph:
305
316
  if node.parallel_step and not graph[node.out_funcs[0]].type == "join":
306
- raise LintWarn(msg.format(node.out_funcs[0]))
317
+ raise LintWarn(
318
+ msg.format(node.out_funcs[0]), node.func_lineno, node.source_file
319
+ )
307
320
 
308
321
 
309
322
  @linter.ensure_static_graph
@@ -318,7 +331,9 @@ def check_parallel_foreach_calls_parallel_step(graph):
318
331
  for node2 in graph:
319
332
  if node2.out_funcs and node.name in node2.out_funcs:
320
333
  if not node2.parallel_foreach:
321
- raise LintWarn(msg.format(node, node2))
334
+ raise LintWarn(
335
+ msg.format(node, node2), node.func_lineno, node.source_file
336
+ )
322
337
 
323
338
 
324
339
  @linter.ensure_non_nested_foreach
@@ -331,4 +346,4 @@ def check_nested_foreach(graph):
331
346
  for node in graph:
332
347
  if node.type == "foreach":
333
348
  if any(graph[p].type == "foreach" for p in node.split_parents):
334
- raise LintWarn(msg.format(node))
349
+ raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
@@ -1,11 +1,12 @@
1
+ import json
1
2
  import time
3
+ from threading import Thread
4
+
2
5
  import requests
3
- import json
4
6
 
5
- from threading import Thread
6
- from metaflow.sidecar import MessageTypes, Message
7
- from metaflow.metaflow_config import SERVICE_HEADERS
8
7
  from metaflow.exception import MetaflowException
8
+ from metaflow.metaflow_config import SERVICE_HEADERS
9
+ from metaflow.sidecar import Message, MessageTypes
9
10
 
10
11
  HB_URL_KEY = "hb_url"
11
12
 
@@ -52,13 +53,27 @@ class MetadataHeartBeat(object):
52
53
  retry_counter = 0
53
54
  except HeartBeatException as e:
54
55
  retry_counter = retry_counter + 1
55
- time.sleep(4**retry_counter)
56
+ time.sleep(1.5**retry_counter)
56
57
 
57
58
  def _heartbeat(self):
58
59
  if self.hb_url is not None:
59
- response = requests.post(
60
- url=self.hb_url, data="{}", headers=self.headers.copy()
61
- )
60
+ try:
61
+ response = requests.post(
62
+ url=self.hb_url, data="{}", headers=self.headers.copy()
63
+ )
64
+ except requests.exceptions.ConnectionError as e:
65
+ raise HeartBeatException(
66
+ "HeartBeat request (%s) failed" " (ConnectionError)" % (self.hb_url)
67
+ )
68
+ except requests.exceptions.Timeout as e:
69
+ raise HeartBeatException(
70
+ "HeartBeat request (%s) failed" " (Timeout)" % (self.hb_url)
71
+ )
72
+ except requests.exceptions.RequestException as e:
73
+ raise HeartBeatException(
74
+ "HeartBeat request (%s) failed"
75
+ " (RequestException) %s" % (self.hb_url, str(e))
76
+ )
62
77
  # Unfortunately, response.json() returns a string that we need
63
78
  # to cast to json; however when the request encounters an error
64
79
  # the return type is a json blob :/
@@ -356,6 +356,8 @@ KUBERNETES_PERSISTENT_VOLUME_CLAIMS = from_conf(
356
356
  KUBERNETES_SECRETS = from_conf("KUBERNETES_SECRETS", "")
357
357
  # Default labels for kubernetes pods
358
358
  KUBERNETES_LABELS = from_conf("KUBERNETES_LABELS", "")
359
+ # Default annotations for kubernetes pods
360
+ KUBERNETES_ANNOTATIONS = from_conf("KUBERNETES_ANNOTATIONS", "")
359
361
  # Default GPU vendor to use by K8S jobs created by Metaflow (supports nvidia, amd)
360
362
  KUBERNETES_GPU_VENDOR = from_conf("KUBERNETES_GPU_VENDOR", "nvidia")
361
363
  # Default container image for K8S
@@ -511,6 +513,11 @@ MAX_CPU_PER_TASK = from_conf("MAX_CPU_PER_TASK")
511
513
  # lexicographic ordering of attempts. This won't work if MAX_ATTEMPTS > 99.
512
514
  MAX_ATTEMPTS = 6
513
515
 
516
+ # Feature flag (experimental features that are *explicitly* unsupported)
517
+
518
+ # Process configs even when using the click_api for Runner/Deployer
519
+ CLICK_API_PROCESS_CONFIG = from_conf("CLICK_API_PROCESS_CONFIG", False)
520
+
514
521
 
515
522
  # PINNED_CONDA_LIBS are the libraries that metaflow depends on for execution
516
523
  # and are needed within a conda environment
metaflow/parameters.py CHANGED
@@ -359,7 +359,7 @@ class Parameter(object):
359
359
  "show_default": show_default,
360
360
  }
361
361
 
362
- def init(self):
362
+ def init(self, ignore_errors=False):
363
363
  # Prevent circular import
364
364
  from .user_configs.config_parameters import (
365
365
  resolve_delayed_evaluator,
@@ -367,14 +367,21 @@ class Parameter(object):
367
367
  )
368
368
 
369
369
  # Resolve any value from configurations
370
- self.kwargs = unpack_delayed_evaluator(self.kwargs)
371
- self.kwargs = resolve_delayed_evaluator(self.kwargs)
370
+ self.kwargs = unpack_delayed_evaluator(self.kwargs, ignore_errors=ignore_errors)
371
+ # Do it one item at a time so errors are ignored at that level (as opposed to
372
+ # at the entire kwargs leve)
373
+ self.kwargs = {
374
+ k: resolve_delayed_evaluator(v, ignore_errors=ignore_errors)
375
+ for k, v in self.kwargs.items()
376
+ }
372
377
 
373
378
  # This was the behavior before configs: values specified in args would override
374
379
  # stuff in kwargs which is what we implement here as well
375
380
  for key, value in self._override_kwargs.items():
376
381
  if value is not None:
377
- self.kwargs[key] = value
382
+ self.kwargs[key] = resolve_delayed_evaluator(
383
+ value, ignore_errors=ignore_errors
384
+ )
378
385
  # Set two default values if no-one specified them
379
386
  self.kwargs.setdefault("required", False)
380
387
  self.kwargs.setdefault("show_default", True)
@@ -1,6 +1,4 @@
1
1
  import json
2
- import os
3
- import sys
4
2
 
5
3
  from metaflow.exception import MetaflowException
6
4
  from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
@@ -7,12 +7,11 @@ import sys
7
7
  from collections import defaultdict
8
8
  from hashlib import sha1
9
9
  from math import inf
10
- from typing import List, Tuple
11
10
 
12
11
  from metaflow import JSONType, current
13
12
  from metaflow.decorators import flow_decorators
14
13
  from metaflow.exception import MetaflowException
15
- from metaflow.graph import DAGNode, FlowGraph
14
+ from metaflow.graph import FlowGraph
16
15
  from metaflow.includefile import FilePathClass
17
16
  from metaflow.metaflow_config import (
18
17
  ARGO_EVENTS_EVENT,
@@ -39,9 +38,7 @@ from metaflow.metaflow_config import (
39
38
  DEFAULT_SECRETS_BACKEND_TYPE,
40
39
  GCP_SECRET_MANAGER_PREFIX,
41
40
  KUBERNETES_FETCH_EC2_METADATA,
42
- KUBERNETES_LABELS,
43
41
  KUBERNETES_NAMESPACE,
44
- KUBERNETES_NODE_SELECTOR,
45
42
  KUBERNETES_SANDBOX_INIT_SCRIPT,
46
43
  KUBERNETES_SECRETS,
47
44
  S3_ENDPOINT_URL,
@@ -55,10 +52,7 @@ from metaflow.metaflow_config_funcs import config_values, init_config
55
52
  from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
56
53
  from metaflow.parameters import deploy_time_eval
57
54
  from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
58
- from metaflow.plugins.kubernetes.kubernetes import (
59
- parse_kube_keyvalue_list,
60
- validate_kube_labels,
61
- )
55
+
62
56
  from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
63
57
  from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
64
58
  from metaflow.user_configs.config_options import ConfigInput
@@ -174,7 +168,8 @@ class ArgoWorkflows(object):
174
168
  self.triggers, self.trigger_options = self._process_triggers()
175
169
  self._schedule, self._timezone = self._get_schedule()
176
170
 
177
- self.kubernetes_labels = self._get_kubernetes_labels()
171
+ self._base_labels = self._base_kubernetes_labels()
172
+ self._base_annotations = self._base_kubernetes_annotations()
178
173
  self._workflow_template = self._compile_workflow_template()
179
174
  self._sensor = self._compile_sensor()
180
175
 
@@ -311,7 +306,7 @@ class ArgoWorkflows(object):
311
306
  try:
312
307
  # Check that the workflow was deployed through Metaflow
313
308
  workflow_template["metadata"]["annotations"]["metaflow/owner"]
314
- except KeyError as e:
309
+ except KeyError:
315
310
  raise ArgoWorkflowsException(
316
311
  "An existing non-metaflow workflow with the same name as "
317
312
  "*%s* already exists in Argo Workflows. \nPlease modify the "
@@ -325,18 +320,42 @@ class ArgoWorkflows(object):
325
320
  except Exception as e:
326
321
  raise ArgoWorkflowsException(str(e))
327
322
 
328
- @staticmethod
329
- def _get_kubernetes_labels():
323
+ def _base_kubernetes_labels(self):
330
324
  """
331
- Get Kubernetes labels from environment variable.
332
- Parses the string into a dict and validates that values adhere to Kubernetes restrictions.
325
+ Get shared Kubernetes labels for Argo resources.
333
326
  """
334
- if not KUBERNETES_LABELS:
335
- return {}
336
- env_labels = KUBERNETES_LABELS.split(",")
337
- env_labels = parse_kube_keyvalue_list(env_labels, False)
338
- validate_kube_labels(env_labels)
339
- return env_labels
327
+ # TODO: Add configuration through an environment variable or Metaflow config in the future if required.
328
+ labels = {"app.kubernetes.io/part-of": "metaflow"}
329
+
330
+ return labels
331
+
332
+ def _base_kubernetes_annotations(self):
333
+ """
334
+ Get shared Kubernetes annotations for Argo resources.
335
+ """
336
+ from datetime import datetime, timezone
337
+
338
+ # TODO: Add configuration through an environment variable or Metaflow config in the future if required.
339
+ # base annotations
340
+ annotations = {
341
+ "metaflow/production_token": self.production_token,
342
+ "metaflow/owner": self.username,
343
+ "metaflow/user": "argo-workflows",
344
+ "metaflow/flow_name": self.flow.name,
345
+ "metaflow/deployment_timestamp": str(
346
+ datetime.now(timezone.utc).isoformat()
347
+ ),
348
+ }
349
+
350
+ if current.get("project_name"):
351
+ annotations.update(
352
+ {
353
+ "metaflow/project_name": current.project_name,
354
+ "metaflow/branch_name": current.branch_name,
355
+ "metaflow/project_flow_name": current.project_flow_name,
356
+ }
357
+ )
358
+ return annotations
340
359
 
341
360
  def _get_schedule(self):
342
361
  schedule = self.flow._flow_decorators.get("schedule")
@@ -412,7 +431,7 @@ class ArgoWorkflows(object):
412
431
  "metaflow/production_token"
413
432
  ],
414
433
  )
415
- except KeyError as e:
434
+ except KeyError:
416
435
  raise ArgoWorkflowsException(
417
436
  "An existing non-metaflow workflow with the same name as "
418
437
  "*%s* already exists in Argo Workflows. \nPlease modify the "
@@ -677,18 +696,7 @@ class ArgoWorkflows(object):
677
696
  # generate container templates at the top level (in WorkflowSpec) and maintain
678
697
  # references to them within the DAGTask.
679
698
 
680
- from datetime import datetime, timezone
681
-
682
- annotations = {
683
- "metaflow/production_token": self.production_token,
684
- "metaflow/owner": self.username,
685
- "metaflow/user": "argo-workflows",
686
- "metaflow/flow_name": self.flow.name,
687
- "metaflow/deployment_timestamp": str(
688
- datetime.now(timezone.utc).isoformat()
689
- ),
690
- }
691
-
699
+ annotations = {}
692
700
  if self._schedule is not None:
693
701
  # timezone is an optional field and json dumps on None will result in null
694
702
  # hence configuring it to an empty string
@@ -700,15 +708,6 @@ class ArgoWorkflows(object):
700
708
  if self.parameters:
701
709
  annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
702
710
 
703
- if current.get("project_name"):
704
- annotations.update(
705
- {
706
- "metaflow/project_name": current.project_name,
707
- "metaflow/branch_name": current.branch_name,
708
- "metaflow/project_flow_name": current.project_flow_name,
709
- }
710
- )
711
-
712
711
  # Some more annotations to populate the Argo UI nicely
713
712
  if self.tags:
714
713
  annotations.update({"metaflow/tags": json.dumps(self.tags)})
@@ -756,9 +755,10 @@ class ArgoWorkflows(object):
756
755
  # is released, we should be able to support multi-namespace /
757
756
  # multi-cluster scheduling.
758
757
  .namespace(KUBERNETES_NAMESPACE)
759
- .label("app.kubernetes.io/name", "metaflow-flow")
760
- .label("app.kubernetes.io/part-of", "metaflow")
761
758
  .annotations(annotations)
759
+ .annotations(self._base_annotations)
760
+ .labels(self._base_labels)
761
+ .label("app.kubernetes.io/name", "metaflow-flow")
762
762
  )
763
763
  .spec(
764
764
  WorkflowSpec()
@@ -788,10 +788,14 @@ class ArgoWorkflows(object):
788
788
  # Set workflow metadata
789
789
  .workflow_metadata(
790
790
  Metadata()
791
+ .labels(self._base_labels)
791
792
  .label("app.kubernetes.io/name", "metaflow-run")
792
- .label("app.kubernetes.io/part-of", "metaflow")
793
793
  .annotations(
794
- {**annotations, **{"metaflow/run_id": "argo-{{workflow.name}}"}}
794
+ {
795
+ **annotations,
796
+ **self._base_annotations,
797
+ **{"metaflow/run_id": "argo-{{workflow.name}}"},
798
+ }
795
799
  )
796
800
  # TODO: Set dynamic labels using labels_from. Ideally, we would
797
801
  # want to expose run_id as a label. It's easy to add labels,
@@ -824,10 +828,10 @@ class ArgoWorkflows(object):
824
828
  # Set common pod metadata.
825
829
  .pod_metadata(
826
830
  Metadata()
831
+ .labels(self._base_labels)
827
832
  .label("app.kubernetes.io/name", "metaflow-task")
828
- .label("app.kubernetes.io/part-of", "metaflow")
829
833
  .annotations(annotations)
830
- .labels(self.kubernetes_labels)
834
+ .annotations(self._base_annotations)
831
835
  )
832
836
  # Set the entrypoint to flow name
833
837
  .entrypoint(self.flow.name)
@@ -1911,15 +1915,7 @@ class ArgoWorkflows(object):
1911
1915
  # twice, but due to issues with variable substitution, we will have to
1912
1916
  # live with this routine.
1913
1917
  if node.parallel_step:
1914
- # Explicitly add the task-id-hint label. This is important because this label
1915
- # is returned as an Output parameter of this step and is used subsequently as an
1916
- # an input in the join step.
1917
- kubernetes_labels = self.kubernetes_labels.copy()
1918
1918
  jobset_name = "{{inputs.parameters.jobset-name}}"
1919
- kubernetes_labels["task_id_entropy"] = (
1920
- "{{inputs.parameters.task-id-entropy}}"
1921
- )
1922
- kubernetes_labels["num_parallel"] = "{{inputs.parameters.num-parallel}}"
1923
1919
  jobset = KubernetesArgoJobSet(
1924
1920
  kubernetes_sdk=kubernetes_sdk,
1925
1921
  name=jobset_name,
@@ -1975,8 +1971,22 @@ class ArgoWorkflows(object):
1975
1971
  for k, v in env.items():
1976
1972
  jobset.environment_variable(k, v)
1977
1973
 
1978
- for k, v in kubernetes_labels.items():
1979
- jobset.label(k, v)
1974
+ # Set labels. Do not allow user-specified task labels to override internal ones.
1975
+ #
1976
+ # Explicitly add the task-id-hint label. This is important because this label
1977
+ # is returned as an Output parameter of this step and is used subsequently as an
1978
+ # an input in the join step.
1979
+ kubernetes_labels = {
1980
+ "task_id_entropy": "{{inputs.parameters.task-id-entropy}}",
1981
+ "num_parallel": "{{inputs.parameters.num-parallel}}",
1982
+ }
1983
+ jobset.labels(
1984
+ {
1985
+ **resources["labels"],
1986
+ **self._base_labels,
1987
+ **kubernetes_labels,
1988
+ }
1989
+ )
1980
1990
 
1981
1991
  jobset.environment_variable(
1982
1992
  "MF_MASTER_ADDR", jobset.jobset_control_addr
@@ -2005,27 +2015,23 @@ class ArgoWorkflows(object):
2005
2015
  "TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
2006
2016
  }
2007
2017
  )
2018
+
2019
+ # Set annotations. Do not allow user-specified task-specific annotations to override internal ones.
2008
2020
  annotations = {
2009
2021
  # setting annotations explicitly as they wont be
2010
2022
  # passed down from WorkflowTemplate level
2011
2023
  "metaflow/step_name": node.name,
2012
2024
  "metaflow/attempt": str(retry_count),
2013
2025
  "metaflow/run_id": run_id,
2014
- "metaflow/production_token": self.production_token,
2015
- "metaflow/owner": self.username,
2016
- "metaflow/user": "argo-workflows",
2017
- "metaflow/flow_name": self.flow.name,
2018
2026
  }
2019
- if current.get("project_name"):
2020
- annotations.update(
2021
- {
2022
- "metaflow/project_name": current.project_name,
2023
- "metaflow/branch_name": current.branch_name,
2024
- "metaflow/project_flow_name": current.project_flow_name,
2025
- }
2026
- )
2027
- for k, v in annotations.items():
2028
- jobset.annotation(k, v)
2027
+
2028
+ jobset.annotations(
2029
+ {
2030
+ **resources["annotations"],
2031
+ **self._base_annotations,
2032
+ **annotations,
2033
+ }
2034
+ )
2029
2035
 
2030
2036
  jobset.control.replicas(1)
2031
2037
  jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
@@ -2082,13 +2088,16 @@ class ArgoWorkflows(object):
2082
2088
  minutes_between_retries=minutes_between_retries,
2083
2089
  )
2084
2090
  .metadata(
2085
- ObjectMeta().annotation("metaflow/step_name", node.name)
2091
+ ObjectMeta()
2092
+ .annotation("metaflow/step_name", node.name)
2086
2093
  # Unfortunately, we can't set the task_id since it is generated
2087
2094
  # inside the pod. However, it can be inferred from the annotation
2088
2095
  # set by argo-workflows - `workflows.argoproj.io/outputs` - refer
2089
2096
  # the field 'task-id' in 'parameters'
2090
2097
  # .annotation("metaflow/task_id", ...)
2091
2098
  .annotation("metaflow/attempt", retry_count)
2099
+ .annotations(resources["annotations"])
2100
+ .labels(resources["labels"])
2092
2101
  )
2093
2102
  # Set emptyDir volume for state management
2094
2103
  .empty_dir_volume("out")
@@ -2871,33 +2880,6 @@ class ArgoWorkflows(object):
2871
2880
  "sdk (https://pypi.org/project/kubernetes/) first."
2872
2881
  )
2873
2882
 
2874
- labels = {"app.kubernetes.io/part-of": "metaflow"}
2875
-
2876
- annotations = {
2877
- "metaflow/production_token": self.production_token,
2878
- "metaflow/owner": self.username,
2879
- "metaflow/user": "argo-workflows",
2880
- "metaflow/flow_name": self.flow.name,
2881
- }
2882
- if current.get("project_name"):
2883
- annotations.update(
2884
- {
2885
- "metaflow/project_name": current.project_name,
2886
- "metaflow/branch_name": current.branch_name,
2887
- "metaflow/project_flow_name": current.project_flow_name,
2888
- }
2889
- )
2890
-
2891
- # Useful to paint the UI
2892
- trigger_annotations = {
2893
- "metaflow/triggered_by": json.dumps(
2894
- [
2895
- {key: trigger.get(key) for key in ["name", "type"]}
2896
- for trigger in self.triggers
2897
- ]
2898
- )
2899
- }
2900
-
2901
2883
  return (
2902
2884
  Sensor()
2903
2885
  .metadata(
@@ -2905,10 +2887,9 @@ class ArgoWorkflows(object):
2905
2887
  ObjectMeta()
2906
2888
  .name(ArgoWorkflows._sensor_name(self.name))
2907
2889
  .namespace(KUBERNETES_NAMESPACE)
2890
+ .labels(self._base_labels)
2908
2891
  .label("app.kubernetes.io/name", "metaflow-sensor")
2909
- .label("app.kubernetes.io/part-of", "metaflow")
2910
- .labels(self.kubernetes_labels)
2911
- .annotations(annotations)
2892
+ .annotations(self._base_annotations)
2912
2893
  )
2913
2894
  .spec(
2914
2895
  SensorSpec().template(
@@ -2918,7 +2899,7 @@ class ArgoWorkflows(object):
2918
2899
  ObjectMeta()
2919
2900
  .label("app.kubernetes.io/name", "metaflow-sensor")
2920
2901
  .label("app.kubernetes.io/part-of", "metaflow")
2921
- .annotations(annotations)
2902
+ .annotations(self._base_annotations)
2922
2903
  )
2923
2904
  .container(
2924
2905
  # Run sensor in guaranteed QoS. The sensor isn't doing a lot
@@ -2965,6 +2946,7 @@ class ArgoWorkflows(object):
2965
2946
  "metadata": {
2966
2947
  "generateName": "%s-" % self.name,
2967
2948
  "namespace": KUBERNETES_NAMESPACE,
2949
+ # Useful to paint the UI
2968
2950
  "annotations": {
2969
2951
  "metaflow/triggered_by": json.dumps(
2970
2952
  [
@@ -8,7 +8,6 @@ from time import sleep
8
8
 
9
9
  from metaflow import JSONType, Run, current, decorators, parameters
10
10
  from metaflow._vendor import click
11
- from metaflow.client.core import get_metadata
12
11
  from metaflow.exception import (
13
12
  MetaflowException,
14
13
  MetaflowInternalError,
@@ -1,14 +1,12 @@
1
1
  import json
2
2
  import os
3
- import time
4
3
 
5
4
 
6
5
  from metaflow import current
7
6
  from metaflow.decorators import StepDecorator
8
7
  from metaflow.events import Trigger
9
8
  from metaflow.metadata_provider import MetaDatum
10
- from metaflow.metaflow_config import ARGO_EVENTS_WEBHOOK_URL
11
- from metaflow.graph import DAGNode, FlowGraph
9
+ from metaflow.graph import FlowGraph
12
10
  from metaflow.flowspec import FlowSpec
13
11
  from .argo_events import ArgoEvent
14
12
 
@@ -42,7 +40,7 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
42
40
  if payload != "null": # Argo-Workflow's None
43
41
  try:
44
42
  payload = json.loads(payload)
45
- except (TypeError, ValueError) as e:
43
+ except (TypeError, ValueError):
46
44
  # There could be arbitrary events that Metaflow doesn't know of
47
45
  payload = {}
48
46
  triggers.append(
@@ -1,5 +1,4 @@
1
1
  import sys
2
- from hashlib import md5
3
2
 
4
3
 
5
4
  def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel):