ob-metaflow 2.12.10.1rc2__py2.py3-none-any.whl → 2.12.11.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (45) hide show
  1. metaflow/client/core.py +6 -6
  2. metaflow/client/filecache.py +16 -3
  3. metaflow/cmd/develop/stub_generator.py +62 -47
  4. metaflow/datastore/content_addressed_store.py +1 -1
  5. metaflow/datastore/task_datastore.py +1 -1
  6. metaflow/decorators.py +2 -4
  7. metaflow/extension_support/__init__.py +3 -3
  8. metaflow/extension_support/plugins.py +3 -3
  9. metaflow/metaflow_config.py +35 -18
  10. metaflow/parameters.py +3 -3
  11. metaflow/plugins/airflow/airflow.py +6 -6
  12. metaflow/plugins/airflow/airflow_utils.py +5 -3
  13. metaflow/plugins/argo/argo_workflows.py +555 -192
  14. metaflow/plugins/argo/argo_workflows_cli.py +27 -4
  15. metaflow/plugins/argo/argo_workflows_decorator.py +6 -13
  16. metaflow/plugins/argo/capture_error.py +70 -0
  17. metaflow/plugins/argo/daemon.py +59 -0
  18. metaflow/plugins/aws/step_functions/step_functions.py +3 -3
  19. metaflow/plugins/cards/card_modules/basic.py +5 -3
  20. metaflow/plugins/cards/card_modules/convert_to_native_type.py +2 -2
  21. metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
  22. metaflow/plugins/cards/card_modules/test_cards.py +0 -2
  23. metaflow/plugins/datastores/gs_storage.py +3 -10
  24. metaflow/plugins/datatools/s3/s3op.py +5 -3
  25. metaflow/plugins/kubernetes/kubernetes.py +1 -0
  26. metaflow/plugins/kubernetes/kubernetes_job.py +32 -42
  27. metaflow/plugins/kubernetes/kubernetes_jobsets.py +16 -14
  28. metaflow/plugins/logs_cli.py +1 -0
  29. metaflow/plugins/pypi/conda_environment.py +1 -3
  30. metaflow/plugins/pypi/pip.py +3 -3
  31. metaflow/plugins/storage_executor.py +1 -5
  32. metaflow/plugins/tag_cli.py +3 -3
  33. metaflow/procpoll.py +1 -1
  34. metaflow/runtime.py +1 -0
  35. metaflow/tracing/__init__.py +0 -5
  36. metaflow/tracing/tracing_modules.py +1 -4
  37. metaflow/util.py +6 -6
  38. metaflow/version.py +1 -1
  39. {ob_metaflow-2.12.10.1rc2.dist-info → ob_metaflow-2.12.11.0.dist-info}/METADATA +2 -2
  40. {ob_metaflow-2.12.10.1rc2.dist-info → ob_metaflow-2.12.11.0.dist-info}/RECORD +44 -43
  41. metaflow/tracing/threadpool.py +0 -30
  42. {ob_metaflow-2.12.10.1rc2.dist-info → ob_metaflow-2.12.11.0.dist-info}/LICENSE +0 -0
  43. {ob_metaflow-2.12.10.1rc2.dist-info → ob_metaflow-2.12.11.0.dist-info}/WHEEL +0 -0
  44. {ob_metaflow-2.12.10.1rc2.dist-info → ob_metaflow-2.12.11.0.dist-info}/entry_points.txt +0 -0
  45. {ob_metaflow-2.12.10.1rc2.dist-info → ob_metaflow-2.12.11.0.dist-info}/top_level.txt +0 -0
@@ -4,15 +4,15 @@ import os
4
4
  import re
5
5
  import shlex
6
6
  import sys
7
- from typing import Tuple, List
8
7
  from collections import defaultdict
9
8
  from hashlib import sha1
10
9
  from math import inf
10
+ from typing import List, Tuple
11
11
 
12
12
  from metaflow import JSONType, current
13
- from metaflow.graph import DAGNode
14
13
  from metaflow.decorators import flow_decorators
15
14
  from metaflow.exception import MetaflowException
15
+ from metaflow.graph import DAGNode, FlowGraph
16
16
  from metaflow.includefile import FilePathClass
17
17
  from metaflow.metaflow_config import (
18
18
  ARGO_EVENTS_EVENT,
@@ -21,10 +21,12 @@ from metaflow.metaflow_config import (
21
21
  ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
22
22
  ARGO_EVENTS_SERVICE_ACCOUNT,
23
23
  ARGO_EVENTS_WEBHOOK_AUTH,
24
+ ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT,
24
25
  ARGO_WORKFLOWS_ENV_VARS_TO_SKIP,
25
26
  ARGO_WORKFLOWS_KUBERNETES_SECRETS,
26
27
  ARGO_WORKFLOWS_UI_URL,
27
28
  AWS_SECRETS_MANAGER_DEFAULT_REGION,
29
+ AZURE_KEY_VAULT_PREFIX,
28
30
  AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
29
31
  CARD_AZUREROOT,
30
32
  CARD_GSROOT,
@@ -36,7 +38,6 @@ from metaflow.metaflow_config import (
36
38
  DEFAULT_METADATA,
37
39
  DEFAULT_SECRETS_BACKEND_TYPE,
38
40
  GCP_SECRET_MANAGER_PREFIX,
39
- AZURE_KEY_VAULT_PREFIX,
40
41
  KUBERNETES_FETCH_EC2_METADATA,
41
42
  KUBERNETES_LABELS,
42
43
  KUBERNETES_NAMESPACE,
@@ -50,7 +51,6 @@ from metaflow.metaflow_config import (
50
51
  UI_URL,
51
52
  PAGERDUTY_TEMPLATE_URL,
52
53
  )
53
- from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
54
54
  from metaflow.metaflow_config_funcs import config_values
55
55
  from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
56
56
  from metaflow.parameters import deploy_time_eval
@@ -58,7 +58,8 @@ from metaflow.plugins.kubernetes.kubernetes import (
58
58
  parse_kube_keyvalue_list,
59
59
  validate_kube_labels,
60
60
  )
61
- from metaflow.graph import FlowGraph
61
+ from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
62
+ from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
62
63
  from metaflow.util import (
63
64
  compress_list,
64
65
  dict_to_cli_options,
@@ -66,9 +67,6 @@ from metaflow.util import (
66
67
  to_camelcase,
67
68
  to_unicode,
68
69
  )
69
- from metaflow.plugins.kubernetes.kubernetes_jobsets import (
70
- KubernetesArgoJobSet,
71
- )
72
70
 
73
71
  from .argo_client import ArgoClient
74
72
 
@@ -118,6 +116,8 @@ class ArgoWorkflows(object):
118
116
  notify_on_success=False,
119
117
  notify_slack_webhook_url=None,
120
118
  notify_pager_duty_integration_key=None,
119
+ enable_heartbeat_daemon=True,
120
+ enable_error_msg_capture=False,
121
121
  ):
122
122
  # Some high-level notes -
123
123
  #
@@ -165,7 +165,8 @@ class ArgoWorkflows(object):
165
165
  self.notify_on_success = notify_on_success
166
166
  self.notify_slack_webhook_url = notify_slack_webhook_url
167
167
  self.notify_pager_duty_integration_key = notify_pager_duty_integration_key
168
-
168
+ self.enable_heartbeat_daemon = enable_heartbeat_daemon
169
+ self.enable_error_msg_capture = enable_error_msg_capture
169
170
  self.parameters = self._process_parameters()
170
171
  self.triggers, self.trigger_options = self._process_triggers()
171
172
  self._schedule, self._timezone = self._get_schedule()
@@ -785,6 +786,12 @@ class ArgoWorkflows(object):
785
786
  )
786
787
  # Set the entrypoint to flow name
787
788
  .entrypoint(self.flow.name)
789
+ # OnExit hooks
790
+ .onExit(
791
+ "capture-error-hook-fn-preflight"
792
+ if self.enable_error_msg_capture
793
+ else None
794
+ )
788
795
  # Set exit hook handlers if notifications are enabled
789
796
  .hooks(
790
797
  {
@@ -854,6 +861,8 @@ class ArgoWorkflows(object):
854
861
  .templates(self._container_templates())
855
862
  # Exit hook template(s)
856
863
  .templates(self._exit_hook_templates())
864
+ # Sidecar templates (Daemon Containers)
865
+ .templates(self._daemon_templates())
857
866
  )
858
867
  )
859
868
 
@@ -1060,7 +1069,7 @@ class ArgoWorkflows(object):
1060
1069
  "%s-foreach-%s"
1061
1070
  % (
1062
1071
  node.name,
1063
- "parallel" if node.parallel_foreach else node.foreach_param
1072
+ "parallel" if node.parallel_foreach else node.foreach_param,
1064
1073
  # Since foreach's are derived based on `self.next(self.a, foreach="<varname>")`
1065
1074
  # vs @parallel foreach are done based on `self.next(self.a, num_parallel="<some-number>")`,
1066
1075
  # we need to ensure that `foreach_template_name` suffix is appropriately set based on the kind
@@ -1266,7 +1275,13 @@ class ArgoWorkflows(object):
1266
1275
  "Argo Workflows." % (node.type, node.name)
1267
1276
  )
1268
1277
 
1269
- templates, _ = _visit(node=self.graph["start"])
1278
+ # Generate daemon tasks
1279
+ daemon_tasks = [
1280
+ DAGTask("%s-task" % daemon_template.name).template(daemon_template.name)
1281
+ for daemon_template in self._daemon_templates()
1282
+ ]
1283
+
1284
+ templates, _ = _visit(node=self.graph["start"], dag_tasks=daemon_tasks)
1270
1285
  return templates
1271
1286
 
1272
1287
  # Visit every node and yield ContainerTemplates.
@@ -1351,7 +1366,7 @@ class ArgoWorkflows(object):
1351
1366
  task_str = "-".join(
1352
1367
  [
1353
1368
  "$TASK_ID_PREFIX",
1354
- "{{inputs.parameters.task-id-entropy}}", # id_base is addition entropy to based on node-name of the workflow
1369
+ "{{inputs.parameters.task-id-entropy}}",
1355
1370
  "$TASK_ID_SUFFIX",
1356
1371
  ]
1357
1372
  )
@@ -1382,8 +1397,6 @@ class ArgoWorkflows(object):
1382
1397
  user_code_retries = max_user_code_retries
1383
1398
  total_retries = max_user_code_retries + max_error_retries
1384
1399
  # {{retries}} is only available if retryStrategy is specified
1385
- # and they are only available in the container templates NOT for custom
1386
- # Kubernetes manifests like Jobsets.
1387
1400
  # For custom kubernetes manifests, we will pass the retryCount as a parameter
1388
1401
  # and use that in the manifest.
1389
1402
  retry_count = (
@@ -1510,8 +1523,7 @@ class ArgoWorkflows(object):
1510
1523
  )
1511
1524
  )
1512
1525
  else:
1513
- # When we run Jobsets with Argo Workflows we need to ensure that `input_paths` are generated using the a formulaic approach
1514
- # because our current strategy of using volume mounts for outputs won't work with Jobsets
1526
+ # Handle @parallel where output from volume mount isn't accessible
1515
1527
  input_paths = (
1516
1528
  "$(python -m metaflow.plugins.argo.jobset_input_paths %s %s {{inputs.parameters.task-id-entropy}} {{inputs.parameters.num-parallel}})"
1517
1529
  % (
@@ -1650,16 +1662,16 @@ class ArgoWorkflows(object):
1650
1662
 
1651
1663
  # support for @secret
1652
1664
  env["METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE"] = DEFAULT_SECRETS_BACKEND_TYPE
1653
- env[
1654
- "METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
1655
- ] = AWS_SECRETS_MANAGER_DEFAULT_REGION
1665
+ env["METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"] = (
1666
+ AWS_SECRETS_MANAGER_DEFAULT_REGION
1667
+ )
1656
1668
  env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
1657
1669
  env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
1658
1670
 
1659
1671
  # support for Azure
1660
- env[
1661
- "METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT"
1662
- ] = AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
1672
+ env["METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT"] = (
1673
+ AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
1674
+ )
1663
1675
  env["METAFLOW_DATASTORE_SYSROOT_AZURE"] = DATASTORE_SYSROOT_AZURE
1664
1676
  env["METAFLOW_CARD_AZUREROOT"] = CARD_AZUREROOT
1665
1677
 
@@ -1724,9 +1736,7 @@ class ArgoWorkflows(object):
1724
1736
  else:
1725
1737
  # append this only for joins of foreaches, not static splits
1726
1738
  inputs.append(Parameter("split-cardinality"))
1727
- # We can use an `elif` condition because the first `if` condition validates if its
1728
- # a foreach join node, hence we can safely assume that if that condition fails then
1729
- # we can check if the node is a @parallel node.
1739
+ # check if the node is a @parallel node.
1730
1740
  elif node.parallel_step:
1731
1741
  inputs.extend(
1732
1742
  [
@@ -1781,7 +1791,7 @@ class ArgoWorkflows(object):
1781
1791
  ),
1782
1792
  ]
1783
1793
  )
1784
- # Outputs should be defined over here, Not in the _dag_template for the `num_parallel` stuff.
1794
+ # Outputs should be defined over here and not in the _dag_template for @parallel.
1785
1795
 
1786
1796
  # It makes no sense to set env vars to None (shows up as "None" string)
1787
1797
  # Also we skip some env vars (e.g. in case we want to pull them from KUBERNETES_SECRETS)
@@ -1808,20 +1818,20 @@ class ArgoWorkflows(object):
1808
1818
 
1809
1819
  if tmpfs_enabled and tmpfs_tempdir:
1810
1820
  env["METAFLOW_TEMPDIR"] = tmpfs_path
1821
+
1811
1822
  # Create a ContainerTemplate for this node. Ideally, we would have
1812
1823
  # liked to inline this ContainerTemplate and avoid scanning the workflow
1813
1824
  # twice, but due to issues with variable substitution, we will have to
1814
1825
  # live with this routine.
1815
1826
  if node.parallel_step:
1816
-
1817
1827
  # Explicitly add the task-id-hint label. This is important because this label
1818
- # is returned as an Output parameter of this step and is used subsequently an
1819
- # an input in the join step. Even the num_parallel is used as an output parameter
1828
+ # is returned as an Output parameter of this step and is used subsequently as an
1829
+ # an input in the join step.
1820
1830
  kubernetes_labels = self.kubernetes_labels.copy()
1821
1831
  jobset_name = "{{inputs.parameters.jobset-name}}"
1822
- kubernetes_labels[
1823
- "task_id_entropy"
1824
- ] = "{{inputs.parameters.task-id-entropy}}"
1832
+ kubernetes_labels["task_id_entropy"] = (
1833
+ "{{inputs.parameters.task-id-entropy}}"
1834
+ )
1825
1835
  kubernetes_labels["num_parallel"] = "{{inputs.parameters.num-parallel}}"
1826
1836
  jobset = KubernetesArgoJobSet(
1827
1837
  kubernetes_sdk=kubernetes_sdk,
@@ -1845,9 +1855,11 @@ class ArgoWorkflows(object):
1845
1855
  list(
1846
1856
  []
1847
1857
  if not resources.get("secrets")
1848
- else [resources.get("secrets")]
1849
- if isinstance(resources.get("secrets"), str)
1850
- else resources.get("secrets")
1858
+ else (
1859
+ [resources.get("secrets")]
1860
+ if isinstance(resources.get("secrets"), str)
1861
+ else resources.get("secrets")
1862
+ )
1851
1863
  )
1852
1864
  + KUBERNETES_SECRETS.split(",")
1853
1865
  + ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
@@ -1878,7 +1890,6 @@ class ArgoWorkflows(object):
1878
1890
  for k, v in kubernetes_labels.items():
1879
1891
  jobset.label(k, v)
1880
1892
 
1881
- ## -----Jobset specific env vars START here-----
1882
1893
  jobset.environment_variable(
1883
1894
  "MF_MASTER_ADDR", jobset.jobset_control_addr
1884
1895
  )
@@ -1897,7 +1908,6 @@ class ArgoWorkflows(object):
1897
1908
  "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
1898
1909
  "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
1899
1910
  "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
1900
- # `TASK_ID_SUFFIX` is needed for the construction of the task-ids
1901
1911
  "TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
1902
1912
  }
1903
1913
  )
@@ -1922,8 +1932,7 @@ class ArgoWorkflows(object):
1922
1932
  )
1923
1933
  for k, v in annotations.items():
1924
1934
  jobset.annotation(k, v)
1925
- ## -----Jobset specific env vars END here-----
1926
- ## ---- Jobset control/workers specific vars START here ----
1935
+
1927
1936
  jobset.control.replicas(1)
1928
1937
  jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
1929
1938
  jobset.control.environment_variable("UBF_CONTEXT", UBF_CONTROL)
@@ -1934,7 +1943,6 @@ class ArgoWorkflows(object):
1934
1943
  jobset.control.environment_variable("TASK_ID_PREFIX", "control")
1935
1944
  jobset.worker.environment_variable("TASK_ID_PREFIX", "worker")
1936
1945
 
1937
- ## ---- Jobset control/workers specific vars END here ----
1938
1946
  yield (
1939
1947
  Template(ArgoWorkflows._sanitize(node.name))
1940
1948
  .resource(
@@ -1961,167 +1969,185 @@ class ArgoWorkflows(object):
1961
1969
  minutes_between_retries=minutes_between_retries,
1962
1970
  )
1963
1971
  )
1964
- continue
1965
- yield (
1966
- Template(self._sanitize(node.name))
1967
- # Set @timeout values
1968
- .active_deadline_seconds(run_time_limit)
1969
- # Set service account
1970
- .service_account_name(resources["service_account"])
1971
- # Configure template input
1972
- .inputs(Inputs().parameters(inputs))
1973
- # Configure template output
1974
- .outputs(Outputs().parameters(outputs))
1975
- # Fail fast!
1976
- .fail_fast()
1977
- # Set @retry/@catch values
1978
- .retry_strategy(
1979
- times=total_retries,
1980
- minutes_between_retries=minutes_between_retries,
1981
- )
1982
- .metadata(
1983
- ObjectMeta().annotation("metaflow/step_name", node.name)
1984
- # Unfortunately, we can't set the task_id since it is generated
1985
- # inside the pod. However, it can be inferred from the annotation
1986
- # set by argo-workflows - `workflows.argoproj.io/outputs` - refer
1987
- # the field 'task-id' in 'parameters'
1988
- # .annotation("metaflow/task_id", ...)
1989
- .annotation("metaflow/attempt", retry_count)
1990
- )
1991
- # Set emptyDir volume for state management
1992
- .empty_dir_volume("out")
1993
- # Set tmpfs emptyDir volume if enabled
1994
- .empty_dir_volume(
1995
- "tmpfs-ephemeral-volume",
1996
- medium="Memory",
1997
- size_limit=tmpfs_size if tmpfs_enabled else 0,
1998
- )
1999
- .empty_dir_volume("dhsm", medium="Memory", size_limit=shared_memory)
2000
- .pvc_volumes(resources.get("persistent_volume_claims"))
2001
- # Set node selectors
2002
- .node_selectors(resources.get("node_selector"))
2003
- # Set tolerations
2004
- .tolerations(resources.get("tolerations"))
2005
- # Set container
2006
- .container(
2007
- # TODO: Unify the logic with kubernetes.py
2008
- # Important note - Unfortunately, V1Container uses snakecase while
2009
- # Argo Workflows uses camel. For most of the attributes, both cases
2010
- # are indistinguishable, but unfortunately, not for all - (
2011
- # env_from, value_from, etc.) - so we need to handle the conversion
2012
- # ourselves using to_camelcase. We need to be vigilant about
2013
- # resources attributes in particular where the keys maybe user
2014
- # defined.
2015
- to_camelcase(
2016
- kubernetes_sdk.V1Container(
2017
- name=self._sanitize(node.name),
2018
- command=cmds,
2019
- termination_message_policy= "FallbackToLogsOnError",
2020
- ports=[kubernetes_sdk.V1ContainerPort(container_port=port)]
2021
- if port
2022
- else None,
2023
- env=[
2024
- kubernetes_sdk.V1EnvVar(name=k, value=str(v))
2025
- for k, v in env.items()
2026
- ]
2027
- # Add environment variables for book-keeping.
2028
- # https://argoproj.github.io/argo-workflows/fields/#fields_155
2029
- + [
2030
- kubernetes_sdk.V1EnvVar(
2031
- name=k,
2032
- value_from=kubernetes_sdk.V1EnvVarSource(
2033
- field_ref=kubernetes_sdk.V1ObjectFieldSelector(
2034
- field_path=str(v)
1972
+ else:
1973
+ yield (
1974
+ Template(self._sanitize(node.name))
1975
+ # Set @timeout values
1976
+ .active_deadline_seconds(run_time_limit)
1977
+ # Set service account
1978
+ .service_account_name(resources["service_account"])
1979
+ # Configure template input
1980
+ .inputs(Inputs().parameters(inputs))
1981
+ # Configure template output
1982
+ .outputs(Outputs().parameters(outputs))
1983
+ # Fail fast!
1984
+ .fail_fast()
1985
+ # Set @retry/@catch values
1986
+ .retry_strategy(
1987
+ times=total_retries,
1988
+ minutes_between_retries=minutes_between_retries,
1989
+ )
1990
+ .metadata(
1991
+ ObjectMeta().annotation("metaflow/step_name", node.name)
1992
+ # Unfortunately, we can't set the task_id since it is generated
1993
+ # inside the pod. However, it can be inferred from the annotation
1994
+ # set by argo-workflows - `workflows.argoproj.io/outputs` - refer
1995
+ # the field 'task-id' in 'parameters'
1996
+ # .annotation("metaflow/task_id", ...)
1997
+ .annotation("metaflow/attempt", retry_count)
1998
+ )
1999
+ # Set emptyDir volume for state management
2000
+ .empty_dir_volume("out")
2001
+ # Set tmpfs emptyDir volume if enabled
2002
+ .empty_dir_volume(
2003
+ "tmpfs-ephemeral-volume",
2004
+ medium="Memory",
2005
+ size_limit=tmpfs_size if tmpfs_enabled else 0,
2006
+ )
2007
+ .empty_dir_volume("dhsm", medium="Memory", size_limit=shared_memory)
2008
+ .pvc_volumes(resources.get("persistent_volume_claims"))
2009
+ # Set node selectors
2010
+ .node_selectors(resources.get("node_selector"))
2011
+ # Set tolerations
2012
+ .tolerations(resources.get("tolerations"))
2013
+ # Set container
2014
+ .container(
2015
+ # TODO: Unify the logic with kubernetes.py
2016
+ # Important note - Unfortunately, V1Container uses snakecase while
2017
+ # Argo Workflows uses camel. For most of the attributes, both cases
2018
+ # are indistinguishable, but unfortunately, not for all - (
2019
+ # env_from, value_from, etc.) - so we need to handle the conversion
2020
+ # ourselves using to_camelcase. We need to be vigilant about
2021
+ # resources attributes in particular where the keys maybe user
2022
+ # defined.
2023
+ to_camelcase(
2024
+ kubernetes_sdk.V1Container(
2025
+ name=self._sanitize(node.name),
2026
+ command=cmds,
2027
+ termination_message_policy="FallbackToLogsOnError",
2028
+ ports=(
2029
+ [
2030
+ kubernetes_sdk.V1ContainerPort(
2031
+ container_port=port
2035
2032
  )
2036
- ),
2037
- )
2038
- for k, v in {
2039
- "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
2040
- "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
2041
- "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
2042
- "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
2043
- "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
2044
- }.items()
2045
- ],
2046
- image=resources["image"],
2047
- image_pull_policy=resources["image_pull_policy"],
2048
- resources=kubernetes_sdk.V1ResourceRequirements(
2049
- requests={
2050
- "cpu": str(resources["cpu"]),
2051
- "memory": "%sM" % str(resources["memory"]),
2052
- "ephemeral-storage": "%sM" % str(resources["disk"]),
2053
- },
2054
- limits={
2055
- "%s.com/gpu".lower()
2056
- % resources["gpu_vendor"]: str(resources["gpu"])
2057
- for k in [0]
2058
- if resources["gpu"] is not None
2059
- },
2060
- ),
2061
- # Configure secrets
2062
- env_from=[
2063
- kubernetes_sdk.V1EnvFromSource(
2064
- secret_ref=kubernetes_sdk.V1SecretEnvSource(
2065
- name=str(k),
2066
- # optional=True
2033
+ ]
2034
+ if port
2035
+ else None
2036
+ ),
2037
+ env=[
2038
+ kubernetes_sdk.V1EnvVar(name=k, value=str(v))
2039
+ for k, v in env.items()
2040
+ ]
2041
+ # Add environment variables for book-keeping.
2042
+ # https://argoproj.github.io/argo-workflows/fields/#fields_155
2043
+ + [
2044
+ kubernetes_sdk.V1EnvVar(
2045
+ name=k,
2046
+ value_from=kubernetes_sdk.V1EnvVarSource(
2047
+ field_ref=kubernetes_sdk.V1ObjectFieldSelector(
2048
+ field_path=str(v)
2049
+ )
2050
+ ),
2067
2051
  )
2068
- )
2069
- for k in list(
2070
- []
2071
- if not resources.get("secrets")
2072
- else [resources.get("secrets")]
2073
- if isinstance(resources.get("secrets"), str)
2074
- else resources.get("secrets")
2075
- )
2076
- + KUBERNETES_SECRETS.split(",")
2077
- + ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
2078
- if k
2079
- ],
2080
- volume_mounts=[
2081
- # Assign a volume mount to pass state to the next task.
2082
- kubernetes_sdk.V1VolumeMount(
2083
- name="out", mount_path="/mnt/out"
2084
- )
2085
- ]
2086
- # Support tmpfs.
2087
- + (
2088
- [
2089
- kubernetes_sdk.V1VolumeMount(
2090
- name="tmpfs-ephemeral-volume",
2091
- mount_path=tmpfs_path,
2052
+ for k, v in {
2053
+ "METAFLOW_KUBERNETES_NAMESPACE": "metadata.namespace",
2054
+ "METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
2055
+ "METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
2056
+ "METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
2057
+ "METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
2058
+ "METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
2059
+ }.items()
2060
+ ],
2061
+ image=resources["image"],
2062
+ image_pull_policy=resources["image_pull_policy"],
2063
+ resources=kubernetes_sdk.V1ResourceRequirements(
2064
+ requests={
2065
+ "cpu": str(resources["cpu"]),
2066
+ "memory": "%sM" % str(resources["memory"]),
2067
+ "ephemeral-storage": "%sM"
2068
+ % str(resources["disk"]),
2069
+ },
2070
+ limits={
2071
+ "%s.com/gpu".lower()
2072
+ % resources["gpu_vendor"]: str(resources["gpu"])
2073
+ for k in [0]
2074
+ if resources["gpu"] is not None
2075
+ },
2076
+ ),
2077
+ # Configure secrets
2078
+ env_from=[
2079
+ kubernetes_sdk.V1EnvFromSource(
2080
+ secret_ref=kubernetes_sdk.V1SecretEnvSource(
2081
+ name=str(k),
2082
+ # optional=True
2083
+ )
2092
2084
  )
2093
- ]
2094
- if tmpfs_enabled
2095
- else []
2096
- )
2097
- # Support shared_memory
2098
- + (
2099
- [
2100
- kubernetes_sdk.V1VolumeMount(
2101
- name="dhsm",
2102
- mount_path="/dev/shm",
2085
+ for k in list(
2086
+ []
2087
+ if not resources.get("secrets")
2088
+ else (
2089
+ [resources.get("secrets")]
2090
+ if isinstance(resources.get("secrets"), str)
2091
+ else resources.get("secrets")
2092
+ )
2103
2093
  )
2104
- ]
2105
- if shared_memory
2106
- else []
2107
- )
2108
- # Support persistent volume claims.
2109
- + (
2110
- [
2094
+ + KUBERNETES_SECRETS.split(",")
2095
+ + ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
2096
+ if k
2097
+ ],
2098
+ volume_mounts=[
2099
+ # Assign a volume mount to pass state to the next task.
2111
2100
  kubernetes_sdk.V1VolumeMount(
2112
- name=claim, mount_path=path
2101
+ name="out", mount_path="/mnt/out"
2113
2102
  )
2114
- for claim, path in resources.get(
2115
- "persistent_volume_claims"
2116
- ).items()
2117
2103
  ]
2118
- if resources.get("persistent_volume_claims") is not None
2119
- else []
2120
- ),
2121
- ).to_dict()
2104
+ # Support tmpfs.
2105
+ + (
2106
+ [
2107
+ kubernetes_sdk.V1VolumeMount(
2108
+ name="tmpfs-ephemeral-volume",
2109
+ mount_path=tmpfs_path,
2110
+ )
2111
+ ]
2112
+ if tmpfs_enabled
2113
+ else []
2114
+ )
2115
+ # Support shared_memory
2116
+ + (
2117
+ [
2118
+ kubernetes_sdk.V1VolumeMount(
2119
+ name="dhsm",
2120
+ mount_path="/dev/shm",
2121
+ )
2122
+ ]
2123
+ if shared_memory
2124
+ else []
2125
+ )
2126
+ # Support persistent volume claims.
2127
+ + (
2128
+ [
2129
+ kubernetes_sdk.V1VolumeMount(
2130
+ name=claim, mount_path=path
2131
+ )
2132
+ for claim, path in resources.get(
2133
+ "persistent_volume_claims"
2134
+ ).items()
2135
+ ]
2136
+ if resources.get("persistent_volume_claims")
2137
+ is not None
2138
+ else []
2139
+ ),
2140
+ ).to_dict()
2141
+ )
2122
2142
  )
2123
2143
  )
2124
- )
2144
+
2145
+ # Return daemon container templates for workflow execution notifications.
2146
+ def _daemon_templates(self):
2147
+ templates = []
2148
+ if self.enable_heartbeat_daemon:
2149
+ templates.append(self._heartbeat_daemon_template())
2150
+ return templates
2125
2151
 
2126
2152
  # Return exit hook templates for workflow execution notifications.
2127
2153
  def _exit_hook_templates(self):
@@ -2149,8 +2175,150 @@ class ArgoWorkflows(object):
2149
2175
  .success_condition("true == true")
2150
2176
  )
2151
2177
  )
2178
+ if self.enable_error_msg_capture:
2179
+ templates.extend(self._error_msg_capture_hook_templates())
2152
2180
  return templates
2153
2181
 
2182
+ def _error_msg_capture_hook_templates(self):
2183
+ from kubernetes import client as kubernetes_sdk
2184
+
2185
+ start_step = [step for step in self.graph if step.name == "start"][0]
2186
+ # We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
2187
+ # and it might contain the required libraries, allowing us to start up faster.
2188
+ resources = dict(
2189
+ [deco for deco in start_step.decorators if deco.name == "kubernetes"][
2190
+ 0
2191
+ ].attributes
2192
+ )
2193
+
2194
+ run_id_template = "argo-{{workflow.name}}"
2195
+ metaflow_version = self.environment.get_environment_info()
2196
+ metaflow_version["flow_name"] = self.graph.name
2197
+ metaflow_version["production_token"] = self.production_token
2198
+
2199
+ mflog_expr = export_mflog_env_vars(
2200
+ datastore_type=self.flow_datastore.TYPE,
2201
+ stdout_path="$PWD/.logs/mflog_stdout",
2202
+ stderr_path="$PWD/.logs/mflog_stderr",
2203
+ flow_name=self.flow.name,
2204
+ run_id=run_id_template,
2205
+ step_name="_run_capture_error",
2206
+ task_id="1",
2207
+ retry_count="0",
2208
+ )
2209
+
2210
+ cmds = " && ".join(
2211
+ [
2212
+ # For supporting sandboxes, ensure that a custom script is executed
2213
+ # before anything else is executed. The script is passed in as an
2214
+ # env var.
2215
+ '${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
2216
+ "mkdir -p $PWD/.logs",
2217
+ mflog_expr,
2218
+ ]
2219
+ + self.environment.get_package_commands(
2220
+ self.code_package_url, self.flow_datastore.TYPE
2221
+ )[:-1]
2222
+ # Replace the line 'Task in starting'
2223
+ # FIXME: this can be brittle.
2224
+ + ["mflog 'Error capture hook is starting.'"]
2225
+ + ["argo_error=$(python -m 'metaflow.plugins.argo.capture_error')"]
2226
+ + ["export METAFLOW_ARGO_ERROR=$argo_error"]
2227
+ + [
2228
+ """python -c 'import json, os; error_obj=os.getenv(\\"METAFLOW_ARGO_ERROR\\");data=json.loads(error_obj); print(data[\\"message\\"])'"""
2229
+ ]
2230
+ + [
2231
+ 'if [ -n \\"${ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT}\\" ]; then eval \\"${ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT}\\"; fi'
2232
+ ]
2233
+ )
2234
+
2235
+ # TODO: Also capture the first failed task id
2236
+ cmds = shlex.split('bash -c "%s"' % cmds)
2237
+ env = {
2238
+ # These values are needed by Metaflow to set it's internal
2239
+ # state appropriately.
2240
+ "METAFLOW_CODE_URL": self.code_package_url,
2241
+ "METAFLOW_CODE_SHA": self.code_package_sha,
2242
+ "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
2243
+ "METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
2244
+ "METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
2245
+ "METAFLOW_USER": "argo-workflows",
2246
+ "METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
2247
+ "METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
2248
+ "METAFLOW_OWNER": self.username,
2249
+ }
2250
+ # support Metaflow sandboxes
2251
+ env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
2252
+ env["METAFLOW_ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT"] = (
2253
+ ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT
2254
+ )
2255
+
2256
+ env["METAFLOW_WORKFLOW_NAME"] = "{{workflow.name}}"
2257
+ env["METAFLOW_WORKFLOW_NAMESPACE"] = "{{workflow.namespace}}"
2258
+ env["METAFLOW_ARGO_WORKFLOW_FAILURES"] = "{{workflow.failures}}"
2259
+ env = {
2260
+ k: v
2261
+ for k, v in env.items()
2262
+ if v is not None
2263
+ and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
2264
+ }
2265
+ return [
2266
+ Template("error-msg-capture-hook").container(
2267
+ to_camelcase(
2268
+ kubernetes_sdk.V1Container(
2269
+ name="main",
2270
+ command=cmds,
2271
+ image=resources["image"],
2272
+ env=[
2273
+ kubernetes_sdk.V1EnvVar(name=k, value=str(v))
2274
+ for k, v in env.items()
2275
+ ],
2276
+ env_from=[
2277
+ kubernetes_sdk.V1EnvFromSource(
2278
+ secret_ref=kubernetes_sdk.V1SecretEnvSource(
2279
+ name=str(k),
2280
+ # optional=True
2281
+ )
2282
+ )
2283
+ for k in list(
2284
+ []
2285
+ if not resources.get("secrets")
2286
+ else (
2287
+ [resources.get("secrets")]
2288
+ if isinstance(resources.get("secrets"), str)
2289
+ else resources.get("secrets")
2290
+ )
2291
+ )
2292
+ + KUBERNETES_SECRETS.split(",")
2293
+ + ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
2294
+ if k
2295
+ ],
2296
+ resources=kubernetes_sdk.V1ResourceRequirements(
2297
+ # NOTE: base resources for this are kept to a minimum to save on running costs.
2298
+ # This has an adverse effect on startup time for the daemon, which can be completely
2299
+ # alleviated by using a base image that has the required dependencies pre-installed
2300
+ requests={
2301
+ "cpu": "200m",
2302
+ "memory": "100Mi",
2303
+ },
2304
+ limits={
2305
+ "cpu": "200m",
2306
+ "memory": "500Mi",
2307
+ },
2308
+ ),
2309
+ )
2310
+ )
2311
+ ),
2312
+ Template("capture-error-hook-fn-preflight").steps(
2313
+ [
2314
+ WorkflowStep()
2315
+ .name("capture-error-hook-fn-preflight")
2316
+ .template("error-msg-capture-hook")
2317
+ .when("{{workflow.status}} != Succeeded")
2318
+ ]
2319
+ ),
2320
+ ]
2321
+
2154
2322
  def _pager_duty_alert_template(self):
2155
2323
  # https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgx-send-an-alert-event
2156
2324
  if self.notify_pager_duty_integration_key is None:
@@ -2337,6 +2505,137 @@ class ArgoWorkflows(object):
2337
2505
  Http("POST").url(self.notify_slack_webhook_url).body(json.dumps(payload))
2338
2506
  )
2339
2507
 
2508
+ def _heartbeat_daemon_template(self):
2509
+ # Use all the affordances available to _parameters task
2510
+ executable = self.environment.executable("_parameters")
2511
+ run_id = "argo-{{workflow.name}}"
2512
+ entrypoint = [executable, "-m metaflow.plugins.argo.daemon"]
2513
+ heartbeat_cmds = "{entrypoint} --flow_name {flow_name} --run_id {run_id} {tags} heartbeat".format(
2514
+ entrypoint=" ".join(entrypoint),
2515
+ flow_name=self.flow.name,
2516
+ run_id=run_id,
2517
+ tags=" ".join(["--tag %s" % t for t in self.tags]) if self.tags else "",
2518
+ )
2519
+
2520
+ # TODO: we do not really need MFLOG logging for the daemon at the moment, but might be good for the future.
2521
+ # Consider if we can do without this setup.
2522
+ # Configure log capture.
2523
+ mflog_expr = export_mflog_env_vars(
2524
+ datastore_type=self.flow_datastore.TYPE,
2525
+ stdout_path="$PWD/.logs/mflog_stdout",
2526
+ stderr_path="$PWD/.logs/mflog_stderr",
2527
+ flow_name=self.flow.name,
2528
+ run_id=run_id,
2529
+ step_name="_run_heartbeat_daemon",
2530
+ task_id="1",
2531
+ retry_count="0",
2532
+ )
2533
+ # TODO: Can the init be trimmed down?
2534
+ # Can we do without get_package_commands fetching the whole code package?
2535
+ init_cmds = " && ".join(
2536
+ [
2537
+ # For supporting sandboxes, ensure that a custom script is executed
2538
+ # before anything else is executed. The script is passed in as an
2539
+ # env var.
2540
+ '${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
2541
+ "mkdir -p $PWD/.logs",
2542
+ mflog_expr,
2543
+ ]
2544
+ + self.environment.get_package_commands(
2545
+ self.code_package_url, self.flow_datastore.TYPE
2546
+ )[:-1]
2547
+ # Replace the line 'Task in starting'
2548
+ # FIXME: this can be brittle.
2549
+ + ["mflog 'Heartbeat daemon is starting.'"]
2550
+ )
2551
+
2552
+ cmd_str = " && ".join([init_cmds, heartbeat_cmds])
2553
+ cmds = shlex.split('bash -c "%s"' % cmd_str)
2554
+
2555
+ # TODO: Check that this is the minimal env.
2556
+ # Env required for sending heartbeats to the metadata service, nothing extra.
2557
+ env = {
2558
+ # These values are needed by Metaflow to set it's internal
2559
+ # state appropriately.
2560
+ "METAFLOW_CODE_URL": self.code_package_url,
2561
+ "METAFLOW_CODE_SHA": self.code_package_sha,
2562
+ "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
2563
+ "METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
2564
+ "METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
2565
+ "METAFLOW_USER": "argo-workflows",
2566
+ "METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
2567
+ "METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
2568
+ "METAFLOW_OWNER": self.username,
2569
+ }
2570
+ # support Metaflow sandboxes
2571
+ env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
2572
+
2573
+ # cleanup env values
2574
+ env = {
2575
+ k: v
2576
+ for k, v in env.items()
2577
+ if v is not None
2578
+ and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
2579
+ }
2580
+
2581
+ # We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
2582
+ # and it might contain the required libraries, allowing us to start up faster.
2583
+ start_step = next(step for step in self.flow if step.name == "start")
2584
+ resources = dict(
2585
+ [deco for deco in start_step.decorators if deco.name == "kubernetes"][
2586
+ 0
2587
+ ].attributes
2588
+ )
2589
+ from kubernetes import client as kubernetes_sdk
2590
+
2591
+ return DaemonTemplate("heartbeat-daemon").container(
2592
+ to_camelcase(
2593
+ kubernetes_sdk.V1Container(
2594
+ name="main",
2595
+ # TODO: Make the image configurable
2596
+ image=resources["image"],
2597
+ command=cmds,
2598
+ env=[
2599
+ kubernetes_sdk.V1EnvVar(name=k, value=str(v))
2600
+ for k, v in env.items()
2601
+ ],
2602
+ env_from=[
2603
+ kubernetes_sdk.V1EnvFromSource(
2604
+ secret_ref=kubernetes_sdk.V1SecretEnvSource(
2605
+ name=str(k),
2606
+ # optional=True
2607
+ )
2608
+ )
2609
+ for k in list(
2610
+ []
2611
+ if not resources.get("secrets")
2612
+ else (
2613
+ [resources.get("secrets")]
2614
+ if isinstance(resources.get("secrets"), str)
2615
+ else resources.get("secrets")
2616
+ )
2617
+ )
2618
+ + KUBERNETES_SECRETS.split(",")
2619
+ + ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
2620
+ if k
2621
+ ],
2622
+ resources=kubernetes_sdk.V1ResourceRequirements(
2623
+ # NOTE: base resources for this are kept to a minimum to save on running costs.
2624
+ # This has an adverse effect on startup time for the daemon, which can be completely
2625
+ # alleviated by using a base image that has the required dependencies pre-installed
2626
+ requests={
2627
+ "cpu": "200m",
2628
+ "memory": "100Mi",
2629
+ },
2630
+ limits={
2631
+ "cpu": "200m",
2632
+ "memory": "100Mi",
2633
+ },
2634
+ ),
2635
+ )
2636
+ )
2637
+ )
2638
+
2340
2639
  def _compile_sensor(self):
2341
2640
  # This method compiles a Metaflow @trigger decorator into Argo Events Sensor.
2342
2641
  #
@@ -2791,6 +3090,34 @@ class ObjectMeta(object):
2791
3090
  return json.dumps(self.to_json(), indent=4)
2792
3091
 
2793
3092
 
3093
+ class WorkflowStep(object):
3094
+ def __init__(self):
3095
+ tree = lambda: defaultdict(tree)
3096
+ self.payload = tree()
3097
+
3098
+ def name(self, name):
3099
+ self.payload["name"] = str(name)
3100
+ return self
3101
+
3102
+ def template(self, template):
3103
+ self.payload["template"] = str(template)
3104
+ return self
3105
+
3106
+ def when(self, condition):
3107
+ self.payload["when"] = str(condition)
3108
+ return self
3109
+
3110
+ def step(self, expression):
3111
+ self.payload["expression"] = str(expression)
3112
+ return self
3113
+
3114
+ def to_json(self):
3115
+ return self.payload
3116
+
3117
+ def __str__(self):
3118
+ return json.dumps(self.to_json(), indent=4)
3119
+
3120
+
2794
3121
  class WorkflowSpec(object):
2795
3122
  # https://argoproj.github.io/argo-workflows/fields/#workflowspec
2796
3123
  # This object sets all Workflow level properties.
@@ -2821,6 +3148,11 @@ class WorkflowSpec(object):
2821
3148
  self.payload["entrypoint"] = entrypoint
2822
3149
  return self
2823
3150
 
3151
+ def onExit(self, on_exit_template):
3152
+ if on_exit_template:
3153
+ self.payload["onExit"] = on_exit_template
3154
+ return self
3155
+
2824
3156
  def parallelism(self, parallelism):
2825
3157
  # Set parallelism at Workflow level
2826
3158
  self.payload["parallelism"] = int(parallelism)
@@ -2909,6 +3241,25 @@ class Metadata(object):
2909
3241
  return json.dumps(self.to_json(), indent=4)
2910
3242
 
2911
3243
 
3244
+ class DaemonTemplate(object):
3245
+ def __init__(self, name):
3246
+ tree = lambda: defaultdict(tree)
3247
+ self.name = name
3248
+ self.payload = tree()
3249
+ self.payload["daemon"] = True
3250
+ self.payload["name"] = name
3251
+
3252
+ def container(self, container):
3253
+ self.payload["container"] = container
3254
+ return self
3255
+
3256
+ def to_json(self):
3257
+ return self.payload
3258
+
3259
+ def __str__(self):
3260
+ return json.dumps(self.payload, indent=4)
3261
+
3262
+
2912
3263
  class Template(object):
2913
3264
  # https://argoproj.github.io/argo-workflows/fields/#template
2914
3265
 
@@ -2927,6 +3278,18 @@ class Template(object):
2927
3278
  self.payload["dag"] = dag_template.to_json()
2928
3279
  return self
2929
3280
 
3281
+ def steps(self, steps):
3282
+ if "steps" not in self.payload:
3283
+ self.payload["steps"] = []
3284
+ # steps is a list of lists.
3285
+ # hence we go over every item in the incoming list
3286
+ # serialize it and then append the list to the payload
3287
+ step_list = []
3288
+ for step in steps:
3289
+ step_list.append(step.to_json())
3290
+ self.payload["steps"].append(step_list)
3291
+ return self
3292
+
2930
3293
  def container(self, container):
2931
3294
  # Luckily this can simply be V1Container and we are spared from writing more
2932
3295
  # boilerplate - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Container.md.