ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
@@ -19,6 +19,7 @@ from metaflow.metaflow_config import (
19
19
  ARGO_EVENTS_EVENT_BUS,
20
20
  ARGO_EVENTS_EVENT_SOURCE,
21
21
  ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
22
+ ARGO_EVENTS_SENSOR_NAMESPACE,
22
23
  ARGO_EVENTS_SERVICE_ACCOUNT,
23
24
  ARGO_EVENTS_WEBHOOK_AUTH,
24
25
  ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT,
@@ -66,6 +67,7 @@ from metaflow.util import (
66
67
  )
67
68
 
68
69
  from .argo_client import ArgoClient
70
+ from .exit_hooks import ExitHookHack, HttpExitHook, ContainerHook
69
71
  from metaflow.util import resolve_identity
70
72
 
71
73
 
@@ -73,6 +75,10 @@ class ArgoWorkflowsException(MetaflowException):
73
75
  headline = "Argo Workflows error"
74
76
 
75
77
 
78
+ class ArgoWorkflowsSensorCleanupException(MetaflowException):
79
+ headline = "Argo Workflows sensor clean up error"
80
+
81
+
76
82
  class ArgoWorkflowsSchedulingException(MetaflowException):
77
83
  headline = "Argo Workflows scheduling error"
78
84
 
@@ -91,6 +97,7 @@ class ArgoWorkflows(object):
91
97
  name,
92
98
  graph: FlowGraph,
93
99
  flow,
100
+ code_package_metadata,
94
101
  code_package_sha,
95
102
  code_package_url,
96
103
  production_token,
@@ -115,6 +122,8 @@ class ArgoWorkflows(object):
115
122
  incident_io_metadata: List[str] = None,
116
123
  enable_heartbeat_daemon=True,
117
124
  enable_error_msg_capture=False,
125
+ workflow_title=None,
126
+ workflow_description=None,
118
127
  ):
119
128
  # Some high-level notes -
120
129
  #
@@ -140,9 +149,19 @@ class ArgoWorkflows(object):
140
149
  # ensure that your Argo Workflows controller doesn't restrict
141
150
  # templateReferencing.
142
151
 
152
+ # get initial configs
153
+ self.initial_configs = init_config()
154
+ for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_URL"]:
155
+ if entry not in self.initial_configs:
156
+ raise ArgoWorkflowsException(
157
+ f"{entry} was not found in metaflow config. Please make sure to run `outerbounds configure <...>` command which can be found on the Outerbounds UI or reach out to your Outerbounds support team."
158
+ )
159
+
143
160
  self.name = name
144
161
  self.graph = graph
162
+ self._parse_conditional_branches()
145
163
  self.flow = flow
164
+ self.code_package_metadata = code_package_metadata
146
165
  self.code_package_sha = code_package_sha
147
166
  self.code_package_url = code_package_url
148
167
  self.production_token = production_token
@@ -169,6 +188,8 @@ class ArgoWorkflows(object):
169
188
  )
170
189
  self.enable_heartbeat_daemon = enable_heartbeat_daemon
171
190
  self.enable_error_msg_capture = enable_error_msg_capture
191
+ self.workflow_title = workflow_title
192
+ self.workflow_description = workflow_description
172
193
  self.parameters = self._process_parameters()
173
194
  self.config_parameters = self._process_config_parameters()
174
195
  self.triggers, self.trigger_options = self._process_triggers()
@@ -183,6 +204,7 @@ class ArgoWorkflows(object):
183
204
  return str(self._workflow_template)
184
205
 
185
206
  def deploy(self):
207
+ self.cleanup_previous_sensors()
186
208
  try:
187
209
  # Register workflow template.
188
210
  ArgoClient(namespace=KUBERNETES_NAMESPACE).register_workflow_template(
@@ -191,6 +213,37 @@ class ArgoWorkflows(object):
191
213
  except Exception as e:
192
214
  raise ArgoWorkflowsException(str(e))
193
215
 
216
+ def cleanup_previous_sensors(self):
217
+ try:
218
+ client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
219
+ # Check for existing deployment and do cleanup
220
+ old_template = client.get_workflow_template(self.name)
221
+ if not old_template:
222
+ return None
223
+ # Clean up old sensors
224
+ old_sensor_namespace = old_template["metadata"]["annotations"].get(
225
+ "metaflow/sensor_namespace"
226
+ )
227
+
228
+ if old_sensor_namespace is None:
229
+ # This workflow was created before sensor annotations
230
+ # and may have a sensor in the default namespace
231
+ # we will delete it and it'll get recreated if need be
232
+ old_sensor_name = ArgoWorkflows._sensor_name(self.name)
233
+ client.delete_sensor(old_sensor_name, client._namespace)
234
+ else:
235
+ # delete old sensor only if it was somewhere else, otherwise it'll get replaced
236
+ old_sensor_name = old_template["metadata"]["annotations"][
237
+ "metaflow/sensor_name"
238
+ ]
239
+ if (
240
+ not self._sensor
241
+ or old_sensor_namespace != ARGO_EVENTS_SENSOR_NAMESPACE
242
+ ):
243
+ client.delete_sensor(old_sensor_name, old_sensor_namespace)
244
+ except Exception as e:
245
+ raise ArgoWorkflowsSensorCleanupException(str(e))
246
+
194
247
  @staticmethod
195
248
  def _sanitize(name):
196
249
  # Metaflow allows underscores in node names, which are disallowed in Argo
@@ -205,28 +258,33 @@ class ArgoWorkflows(object):
205
258
  return name.replace(".", "-")
206
259
 
207
260
  @staticmethod
208
- def list_templates(flow_name, all=False):
261
+ def list_templates(flow_name, all=False, page_size=100):
209
262
  client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
210
263
 
211
- templates = client.get_workflow_templates()
212
- if templates is None:
213
- return []
214
-
215
- template_names = [
216
- template["metadata"]["name"]
217
- for template in templates
218
- if all
219
- or flow_name
220
- == template["metadata"]
221
- .get("annotations", {})
222
- .get("metaflow/flow_name", None)
223
- ]
224
- return template_names
264
+ for template in client.get_workflow_templates(page_size=page_size):
265
+ if all or flow_name == template["metadata"].get("annotations", {}).get(
266
+ "metaflow/flow_name", None
267
+ ):
268
+ yield template["metadata"]["name"]
225
269
 
226
270
  @staticmethod
227
271
  def delete(name):
228
272
  client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
229
273
 
274
+ # the workflow template might not exist, but we still want to try clean up associated sensors and schedules.
275
+ workflow_template = client.get_workflow_template(name) or {}
276
+ workflow_annotations = workflow_template.get("metadata", {}).get(
277
+ "annotations", {}
278
+ )
279
+
280
+ sensor_name = ArgoWorkflows._sensor_name(
281
+ workflow_annotations.get("metaflow/sensor_name", name)
282
+ )
283
+ # if below is missing then it was deployed before custom sensor namespaces
284
+ sensor_namespace = workflow_annotations.get(
285
+ "metaflow/sensor_namespace", KUBERNETES_NAMESPACE
286
+ )
287
+
230
288
  # Always try to delete the schedule. Failure in deleting the schedule should not
231
289
  # be treated as an error, due to any of the following reasons
232
290
  # - there might not have been a schedule, or it was deleted by some other means
@@ -236,7 +294,7 @@ class ArgoWorkflows(object):
236
294
 
237
295
  # The workflow might have sensors attached to it, which consume actual resources.
238
296
  # Try to delete these as well.
239
- sensor_deleted = client.delete_sensor(ArgoWorkflows._sensor_name(name))
297
+ sensor_deleted = client.delete_sensor(sensor_name, sensor_namespace)
240
298
 
241
299
  # After cleaning up related resources, delete the workflow in question.
242
300
  # Failure in deleting is treated as critical and will be made visible to the user
@@ -260,6 +318,7 @@ class ArgoWorkflows(object):
260
318
  flow_name=flow_name, run_id=name
261
319
  )
262
320
  )
321
+ return True
263
322
 
264
323
  @staticmethod
265
324
  def get_workflow_status(flow_name, name):
@@ -384,6 +443,25 @@ class ArgoWorkflows(object):
384
443
  "metaflow/project_flow_name": current.project_flow_name,
385
444
  }
386
445
  )
446
+
447
+ # Add Argo Workflows title and description annotations
448
+ # https://argo-workflows.readthedocs.io/en/latest/title-and-description/
449
+ # Use CLI-provided values or auto-populate from metadata
450
+ title = (
451
+ (self.workflow_title.strip() if self.workflow_title else None)
452
+ or current.get("project_flow_name")
453
+ or self.flow.name
454
+ )
455
+
456
+ description = (
457
+ self.workflow_description.strip() if self.workflow_description else None
458
+ ) or (self.flow.__doc__.strip() if self.flow.__doc__ else None)
459
+
460
+ if title:
461
+ annotations["workflows.argoproj.io/title"] = title
462
+ if description:
463
+ annotations["workflows.argoproj.io/description"] = description
464
+
387
465
  return annotations
388
466
 
389
467
  def _get_schedule(self):
@@ -404,11 +482,10 @@ class ArgoWorkflows(object):
404
482
  # Metaflow will overwrite any existing sensor.
405
483
  sensor_name = ArgoWorkflows._sensor_name(self.name)
406
484
  if self._sensor:
407
- argo_client.register_sensor(sensor_name, self._sensor.to_json())
408
- else:
409
- # Since sensors occupy real resources, delete existing sensor if needed
410
- # Deregister sensors that might have existed before this deployment
411
- argo_client.delete_sensor(sensor_name)
485
+ # The new sensor will go into the sensor namespace specified
486
+ ArgoClient(namespace=ARGO_EVENTS_SENSOR_NAMESPACE).register_sensor(
487
+ sensor_name, self._sensor.to_json(), ARGO_EVENTS_SENSOR_NAMESPACE
488
+ )
412
489
  except Exception as e:
413
490
  raise ArgoWorkflowsSchedulingException(str(e))
414
491
 
@@ -541,7 +618,16 @@ class ArgoWorkflows(object):
541
618
  # the JSON equivalent of None to please argo-workflows. Unfortunately it
542
619
  # has the side effect of casting the parameter value to string null during
543
620
  # execution - which needs to be fixed imminently.
544
- if not is_required or default_value is not None:
621
+ if default_value is None:
622
+ default_value = json.dumps(None)
623
+ elif param_type == "JSON":
624
+ if not isinstance(default_value, str):
625
+ # once to serialize the default value if needed.
626
+ default_value = json.dumps(default_value)
627
+ # adds outer quotes to param
628
+ default_value = json.dumps(default_value)
629
+ else:
630
+ # Make argo sensors happy
545
631
  default_value = json.dumps(default_value)
546
632
 
547
633
  parameters[param.name] = dict(
@@ -551,7 +637,7 @@ class ArgoWorkflows(object):
551
637
  type=param_type,
552
638
  description=param.kwargs.get("help"),
553
639
  is_required=is_required,
554
- **extra_attrs
640
+ **extra_attrs,
555
641
  )
556
642
  return parameters
557
643
 
@@ -735,6 +821,7 @@ class ArgoWorkflows(object):
735
821
  # references to them within the DAGTask.
736
822
 
737
823
  annotations = {}
824
+
738
825
  if self._schedule is not None:
739
826
  # timezone is an optional field and json dumps on None will result in null
740
827
  # hence configuring it to an empty string
@@ -757,7 +844,9 @@ class ArgoWorkflows(object):
757
844
  {key: trigger.get(key) for key in ["name", "type"]}
758
845
  for trigger in self.triggers
759
846
  ]
760
- )
847
+ ),
848
+ "metaflow/sensor_name": ArgoWorkflows._sensor_name(self.name),
849
+ "metaflow/sensor_namespace": ARGO_EVENTS_SENSOR_NAMESPACE,
761
850
  }
762
851
  )
763
852
  if self.notify_on_error:
@@ -796,6 +885,7 @@ class ArgoWorkflows(object):
796
885
 
797
886
  dag_annotation = {"metaflow/dag": json.dumps(graph_info)}
798
887
 
888
+ lifecycle_hooks = self._lifecycle_hooks()
799
889
  return (
800
890
  WorkflowTemplate()
801
891
  .metadata(
@@ -845,7 +935,16 @@ class ArgoWorkflows(object):
845
935
  .annotations(
846
936
  {
847
937
  **annotations,
848
- **self._base_annotations,
938
+ **{
939
+ k: v
940
+ for k, v in self._base_annotations.items()
941
+ if k
942
+ # Skip custom title/description for workflows as this makes it harder to find specific runs.
943
+ not in [
944
+ "workflows.argoproj.io/title",
945
+ "workflows.argoproj.io/description",
946
+ ]
947
+ },
849
948
  **{"metaflow/run_id": "argo-{{workflow.name}}"},
850
949
  }
851
950
  )
@@ -860,11 +959,7 @@ class ArgoWorkflows(object):
860
959
  Arguments().parameters(
861
960
  [
862
961
  Parameter(parameter["name"])
863
- .value(
864
- "'%s'" % parameter["value"]
865
- if parameter["type"] == "JSON"
866
- else parameter["value"]
867
- )
962
+ .value(parameter["value"])
868
963
  .description(parameter.get("description"))
869
964
  # TODO: Better handle IncludeFile in Argo Workflows UI.
870
965
  for parameter in self.parameters.values()
@@ -904,97 +999,20 @@ class ArgoWorkflows(object):
904
999
  if self.enable_error_msg_capture
905
1000
  else None
906
1001
  )
907
- # Set exit hook handlers if notifications are enabled
1002
+ # Set lifecycle hooks if notifications are enabled
908
1003
  .hooks(
909
1004
  {
910
- **(
911
- {
912
- # workflow status maps to Completed
913
- "notify-slack-on-success": LifecycleHook()
914
- .expression("workflow.status == 'Succeeded'")
915
- .template("notify-slack-on-success"),
916
- }
917
- if self.notify_on_success and self.notify_slack_webhook_url
918
- else {}
919
- ),
920
- **(
921
- {
922
- # workflow status maps to Completed
923
- "notify-pager-duty-on-success": LifecycleHook()
924
- .expression("workflow.status == 'Succeeded'")
925
- .template("notify-pager-duty-on-success"),
926
- }
927
- if self.notify_on_success
928
- and self.notify_pager_duty_integration_key
929
- else {}
930
- ),
931
- **(
932
- {
933
- # workflow status maps to Completed
934
- "notify-incident-io-on-success": LifecycleHook()
935
- .expression("workflow.status == 'Succeeded'")
936
- .template("notify-incident-io-on-success"),
937
- }
938
- if self.notify_on_success
939
- and self.notify_incident_io_api_key
940
- else {}
941
- ),
942
- **(
943
- {
944
- # workflow status maps to Failed or Error
945
- "notify-slack-on-failure": LifecycleHook()
946
- .expression("workflow.status == 'Failed'")
947
- .template("notify-slack-on-error"),
948
- "notify-slack-on-error": LifecycleHook()
949
- .expression("workflow.status == 'Error'")
950
- .template("notify-slack-on-error"),
951
- }
952
- if self.notify_on_error and self.notify_slack_webhook_url
953
- else {}
954
- ),
955
- **(
956
- {
957
- # workflow status maps to Failed or Error
958
- "notify-pager-duty-on-failure": LifecycleHook()
959
- .expression("workflow.status == 'Failed'")
960
- .template("notify-pager-duty-on-error"),
961
- "notify-pager-duty-on-error": LifecycleHook()
962
- .expression("workflow.status == 'Error'")
963
- .template("notify-pager-duty-on-error"),
964
- }
965
- if self.notify_on_error
966
- and self.notify_pager_duty_integration_key
967
- else {}
968
- ),
969
- **(
970
- {
971
- # workflow status maps to Failed or Error
972
- "notify-incident-io-on-failure": LifecycleHook()
973
- .expression("workflow.status == 'Failed'")
974
- .template("notify-incident-io-on-error"),
975
- "notify-incident-io-on-error": LifecycleHook()
976
- .expression("workflow.status == 'Error'")
977
- .template("notify-incident-io-on-error"),
978
- }
979
- if self.notify_on_error and self.notify_incident_io_api_key
980
- else {}
981
- ),
982
- # Warning: terrible hack to workaround a bug in Argo Workflow
983
- # where the hooks listed above do not execute unless
984
- # there is an explicit exit hook. as and when this
985
- # bug is patched, we should remove this effectively
986
- # no-op hook.
987
- **(
988
- {"exit": LifecycleHook().template("exit-hook-hack")}
989
- if self.notify_on_error or self.notify_on_success
990
- else {}
991
- ),
1005
+ lifecycle.name: lifecycle
1006
+ for hook in lifecycle_hooks
1007
+ for lifecycle in hook.lifecycle_hooks
992
1008
  }
993
1009
  )
994
1010
  # Top-level DAG template(s)
995
1011
  .templates(self._dag_templates())
996
1012
  # Container templates
997
1013
  .templates(self._container_templates())
1014
+ # Lifecycle hook template(s)
1015
+ .templates([hook.template for hook in lifecycle_hooks])
998
1016
  # Exit hook template(s)
999
1017
  .templates(self._exit_hook_templates())
1000
1018
  # Sidecar templates (Daemon Containers)
@@ -1002,6 +1020,199 @@ class ArgoWorkflows(object):
1002
1020
  )
1003
1021
  )
1004
1022
 
1023
+ # Visit every node and record information on conditional step structure
1024
+ def _parse_conditional_branches(self):
1025
+ self.conditional_nodes = set()
1026
+ self.conditional_join_nodes = set()
1027
+ self.matching_conditional_join_dict = {}
1028
+ self.recursive_nodes = set()
1029
+
1030
+ node_conditional_parents = {}
1031
+ node_conditional_branches = {}
1032
+
1033
+ def _visit(node, conditional_branch, conditional_parents=None):
1034
+ if not node.type == "split-switch" and not (
1035
+ conditional_branch and conditional_parents
1036
+ ):
1037
+ # skip regular non-conditional nodes entirely
1038
+ return
1039
+
1040
+ if node.type == "split-switch":
1041
+ conditional_branch = conditional_branch + [node.name]
1042
+ c_br = node_conditional_branches.get(node.name, [])
1043
+ node_conditional_branches[node.name] = c_br + [
1044
+ b for b in conditional_branch if b not in c_br
1045
+ ]
1046
+
1047
+ conditional_parents = (
1048
+ [node.name]
1049
+ if not conditional_parents
1050
+ else conditional_parents + [node.name]
1051
+ )
1052
+ node_conditional_parents[node.name] = conditional_parents
1053
+
1054
+ # check for recursion. this split is recursive if any of its out functions are itself.
1055
+ if any(
1056
+ out_func for out_func in node.out_funcs if out_func == node.name
1057
+ ):
1058
+ self.recursive_nodes.add(node.name)
1059
+
1060
+ if conditional_parents and not node.type == "split-switch":
1061
+ node_conditional_parents[node.name] = conditional_parents
1062
+ conditional_branch = conditional_branch + [node.name]
1063
+ c_br = node_conditional_branches.get(node.name, [])
1064
+ node_conditional_branches[node.name] = c_br + [
1065
+ b for b in conditional_branch if b not in c_br
1066
+ ]
1067
+
1068
+ self.conditional_nodes.add(node.name)
1069
+
1070
+ if conditional_branch and conditional_parents:
1071
+ for n in node.out_funcs:
1072
+ child = self.graph[n]
1073
+ if child.name == node.name:
1074
+ continue
1075
+ _visit(child, conditional_branch, conditional_parents)
1076
+
1077
+ # First we visit all nodes to determine conditional parents and branches
1078
+ for n in self.graph:
1079
+ _visit(n, [])
1080
+
1081
+ # helper to clean up conditional info for all children of a node, until a new split-switch is encountered.
1082
+ def _cleanup_conditional_status(node_name, seen):
1083
+ if self.graph[node_name].type == "split-switch":
1084
+ # stop recursive cleanup if we hit a new split-switch
1085
+ return
1086
+ if node_name in self.conditional_nodes:
1087
+ self.conditional_nodes.remove(node_name)
1088
+ node_conditional_parents[node_name] = []
1089
+ node_conditional_branches[node_name] = []
1090
+ for p in self.graph[node_name].out_funcs:
1091
+ if p not in seen:
1092
+ _cleanup_conditional_status(p, seen + [p])
1093
+
1094
+ # Then we traverse again in order to determine conditional join nodes, and matching conditional join info
1095
+ for node in self.graph:
1096
+ if node_conditional_parents.get(node.name, False):
1097
+ # do the required postprocessing for anything requiring node.in_funcs
1098
+
1099
+ # check that in previous parsing we have not closed all conditional in_funcs.
1100
+ # If so, this step can not be conditional either
1101
+ is_conditional = any(
1102
+ in_func in self.conditional_nodes
1103
+ or self.graph[in_func].type == "split-switch"
1104
+ for in_func in node.in_funcs
1105
+ )
1106
+ if is_conditional:
1107
+ self.conditional_nodes.add(node.name)
1108
+ else:
1109
+ if node.name in self.conditional_nodes:
1110
+ self.conditional_nodes.remove(node.name)
1111
+
1112
+ # does this node close the latest conditional parent branches?
1113
+ conditional_in_funcs = [
1114
+ in_func
1115
+ for in_func in node.in_funcs
1116
+ if node_conditional_branches.get(in_func, False)
1117
+ ]
1118
+ closed_conditional_parents = []
1119
+ for last_split_switch in node_conditional_parents.get(node.name, [])[
1120
+ ::-1
1121
+ ]:
1122
+ last_conditional_split_nodes = self.graph[
1123
+ last_split_switch
1124
+ ].out_funcs
1125
+ # NOTE: How do we define a conditional join step?
1126
+ # The idea here is that we check if the conditional branches(e.g. chains of conditional steps leading to) of all the in_funcs
1127
+ # manage to tick off every step name that follows a split-switch
1128
+ # For example, consider the following structure
1129
+ # switch_step -> A, B, C
1130
+ # A -> A2 -> A3 -> A4 -> B2
1131
+ # B -> B2 -> B3 -> C3
1132
+ # C -> C2 -> C3 -> end
1133
+ #
1134
+ # if we look at the in_funcs for C3, they are (C2, B3)
1135
+ # B3 closes off branches started by A and B
1136
+ # C3 closes off branches started by C
1137
+ # therefore C3 is a conditional join step for the 'switch_step'
1138
+ # NOTE: Then what about a skip step?
1139
+ # some switch cases might not introduce any distinct steps of their own, opting to instead skip ahead to a later common step.
1140
+ # Example:
1141
+ # switch_step -> A, B, C
1142
+ # A -> A1 -> B2 -> C
1143
+ # B -> B1 -> B2 -> C
1144
+ #
1145
+ # In this case, C is a skip step as it does not add any conditional branching of its own.
1146
+ # C is also a conditional join, as it closes all branches started by 'switch_step'
1147
+
1148
+ closes_branches = all(
1149
+ (
1150
+ # branch_root_node_name needs to be in at least one conditional_branch for it to be closed.
1151
+ any(
1152
+ branch_root_node_name
1153
+ in node_conditional_branches.get(in_func, [])
1154
+ for in_func in conditional_in_funcs
1155
+ )
1156
+ # need to account for a switch case skipping completely, not having a conditional-branch of its own.
1157
+ if branch_root_node_name != node.name
1158
+ else True
1159
+ )
1160
+ for branch_root_node_name in last_conditional_split_nodes
1161
+ )
1162
+ if closes_branches:
1163
+ closed_conditional_parents.append(last_split_switch)
1164
+
1165
+ self.conditional_join_nodes.add(node.name)
1166
+ self.matching_conditional_join_dict[last_split_switch] = (
1167
+ node.name
1168
+ )
1169
+
1170
+ # Did we close all conditionals? Then this branch and all its children are not conditional anymore (unless a new conditional branch is encountered).
1171
+ if not [
1172
+ p
1173
+ for p in node_conditional_parents.get(node.name, [])
1174
+ if p not in closed_conditional_parents
1175
+ ]:
1176
+ _cleanup_conditional_status(node.name, [])
1177
+
1178
+ def _is_conditional_node(self, node):
1179
+ return node.name in self.conditional_nodes
1180
+
1181
+ def _is_conditional_skip_node(self, node):
1182
+ return (
1183
+ self._is_conditional_node(node)
1184
+ and any(
1185
+ self.graph[in_func].type == "split-switch" for in_func in node.in_funcs
1186
+ )
1187
+ and len(
1188
+ [
1189
+ in_func
1190
+ for in_func in node.in_funcs
1191
+ if self._is_conditional_node(self.graph[in_func])
1192
+ or self.graph[in_func].type == "split-switch"
1193
+ ]
1194
+ )
1195
+ > 1
1196
+ )
1197
+
1198
+ def _is_conditional_join_node(self, node):
1199
+ return node.name in self.conditional_join_nodes
1200
+
1201
+ def _many_in_funcs_all_conditional(self, node):
1202
+ cond_in_funcs = [
1203
+ in_func
1204
+ for in_func in node.in_funcs
1205
+ if self._is_conditional_node(self.graph[in_func])
1206
+ ]
1207
+ return len(cond_in_funcs) > 1 and len(cond_in_funcs) == len(node.in_funcs)
1208
+
1209
+ def _is_recursive_node(self, node):
1210
+ return node.name in self.recursive_nodes
1211
+
1212
+ def _matching_conditional_join(self, node):
1213
+ # If no earlier conditional join step is found during parsing, then 'end' is always one.
1214
+ return self.matching_conditional_join_dict.get(node.name, "end")
1215
+
1005
1216
  # Visit every node and yield the uber DAGTemplate(s).
1006
1217
  def _dag_templates(self):
1007
1218
  def _visit(
@@ -1010,6 +1221,7 @@ class ArgoWorkflows(object):
1010
1221
  templates=None,
1011
1222
  dag_tasks=None,
1012
1223
  parent_foreach=None,
1224
+ seen=None,
1013
1225
  ): # Returns Tuple[List[Template], List[DAGTask]]
1014
1226
  """ """
1015
1227
  # Every for-each node results in a separate subDAG and an equivalent
@@ -1019,18 +1231,28 @@ class ArgoWorkflows(object):
1019
1231
  # of the for-each node.
1020
1232
 
1021
1233
  # Emit if we have reached the end of the sub workflow
1234
+ if seen is None:
1235
+ seen = []
1022
1236
  if dag_tasks is None:
1023
1237
  dag_tasks = []
1024
1238
  if templates is None:
1025
1239
  templates = []
1240
+
1026
1241
  if exit_node is not None and exit_node is node.name:
1027
1242
  return templates, dag_tasks
1243
+ if node.name in seen:
1244
+ return templates, dag_tasks
1245
+
1246
+ seen.append(node.name)
1247
+
1248
+ # helper variable for recursive conditional inputs
1249
+ has_foreach_inputs = False
1028
1250
  if node.name == "start":
1029
1251
  # Start node has no dependencies.
1030
1252
  dag_task = DAGTask(self._sanitize(node.name)).template(
1031
1253
  self._sanitize(node.name)
1032
1254
  )
1033
- elif (
1255
+ if (
1034
1256
  node.is_inside_foreach
1035
1257
  and self.graph[node.in_funcs[0]].type == "foreach"
1036
1258
  and not self.graph[node.in_funcs[0]].parallel_foreach
@@ -1038,9 +1260,10 @@ class ArgoWorkflows(object):
1038
1260
  # vs what is a "num_parallel" based foreach (i.e. something that follows gang semantics.)
1039
1261
  # A `regular` foreach is basically any arbitrary kind of foreach.
1040
1262
  ):
1263
+ # helper variable for recursive conditional inputs
1264
+ has_foreach_inputs = True
1041
1265
  # Child of a foreach node needs input-paths as well as split-index
1042
1266
  # This child is the first node of the sub workflow and has no dependency
1043
-
1044
1267
  parameters = [
1045
1268
  Parameter("input-paths").value("{{inputs.parameters.input-paths}}"),
1046
1269
  Parameter("split-index").value("{{inputs.parameters.split-index}}"),
@@ -1164,23 +1387,89 @@ class ArgoWorkflows(object):
1164
1387
  ]
1165
1388
  )
1166
1389
 
1390
+ conditional_deps = [
1391
+ "%s.Succeeded" % self._sanitize(in_func)
1392
+ for in_func in node.in_funcs
1393
+ if self._is_conditional_node(self.graph[in_func])
1394
+ or self.graph[in_func].type == "split-switch"
1395
+ ]
1396
+ required_deps = [
1397
+ "%s.Succeeded" % self._sanitize(in_func)
1398
+ for in_func in node.in_funcs
1399
+ if not self._is_conditional_node(self.graph[in_func])
1400
+ and self.graph[in_func].type != "split-switch"
1401
+ ]
1402
+ if self._is_conditional_skip_node(
1403
+ node
1404
+ ) or self._many_in_funcs_all_conditional(node):
1405
+ # skip nodes need unique condition handling
1406
+ conditional_deps = [
1407
+ "%s.Succeeded" % self._sanitize(in_func)
1408
+ for in_func in node.in_funcs
1409
+ ]
1410
+ required_deps = []
1411
+
1412
+ both_conditions = required_deps and conditional_deps
1413
+
1414
+ depends_str = "{required}{_and}{conditional}".format(
1415
+ required=("(%s)" if both_conditions else "%s")
1416
+ % " && ".join(required_deps),
1417
+ _and=" && " if both_conditions else "",
1418
+ conditional=("(%s)" if both_conditions else "%s")
1419
+ % " || ".join(conditional_deps),
1420
+ )
1167
1421
  dag_task = (
1168
1422
  DAGTask(self._sanitize(node.name))
1169
- .dependencies(
1170
- [self._sanitize(in_func) for in_func in node.in_funcs]
1171
- )
1423
+ .depends(depends_str)
1172
1424
  .template(self._sanitize(node.name))
1173
1425
  .arguments(Arguments().parameters(parameters))
1174
1426
  )
1175
1427
 
1428
+ # Add conditional if this is the first step in a conditional branch
1429
+ switch_in_funcs = [
1430
+ in_func
1431
+ for in_func in node.in_funcs
1432
+ if self.graph[in_func].type == "split-switch"
1433
+ ]
1434
+ if (
1435
+ self._is_conditional_node(node)
1436
+ or self._is_conditional_skip_node(node)
1437
+ or self._is_conditional_join_node(node)
1438
+ ) and switch_in_funcs:
1439
+ conditional_when = "||".join(
1440
+ [
1441
+ "{{tasks.%s.outputs.parameters.switch-step}}==%s"
1442
+ % (self._sanitize(switch_in_func), node.name)
1443
+ for switch_in_func in switch_in_funcs
1444
+ ]
1445
+ )
1446
+
1447
+ non_switch_in_funcs = [
1448
+ in_func
1449
+ for in_func in node.in_funcs
1450
+ if in_func not in switch_in_funcs
1451
+ ]
1452
+ status_when = ""
1453
+ if non_switch_in_funcs:
1454
+ status_when = "||".join(
1455
+ [
1456
+ "{{tasks.%s.status}}==Succeeded"
1457
+ % self._sanitize(in_func)
1458
+ for in_func in non_switch_in_funcs
1459
+ ]
1460
+ )
1461
+
1462
+ total_when = (
1463
+ f"({status_when}) || ({conditional_when})"
1464
+ if status_when
1465
+ else conditional_when
1466
+ )
1467
+ dag_task.when(total_when)
1468
+
1176
1469
  dag_tasks.append(dag_task)
1177
1470
  # End the workflow if we have reached the end of the flow
1178
1471
  if node.type == "end":
1179
- return [
1180
- Template(self.flow.name).dag(
1181
- DAGTemplate().fail_fast().tasks(dag_tasks)
1182
- )
1183
- ] + templates, dag_tasks
1472
+ return templates, dag_tasks
1184
1473
  # For split nodes traverse all the children
1185
1474
  if node.type == "split":
1186
1475
  for n in node.out_funcs:
@@ -1190,6 +1479,7 @@ class ArgoWorkflows(object):
1190
1479
  templates,
1191
1480
  dag_tasks,
1192
1481
  parent_foreach,
1482
+ seen,
1193
1483
  )
1194
1484
  return _visit(
1195
1485
  self.graph[node.matching_join],
@@ -1197,6 +1487,118 @@ class ArgoWorkflows(object):
1197
1487
  templates,
1198
1488
  dag_tasks,
1199
1489
  parent_foreach,
1490
+ seen,
1491
+ )
1492
+ elif node.type == "split-switch":
1493
+ if self._is_recursive_node(node):
1494
+ # we need an additional recursive template if the step is recursive
1495
+ # NOTE: in the recursive case, the original step is renamed in the container templates to 'recursive-<step_name>'
1496
+ # so that we do not have to touch the step references in the DAG.
1497
+ #
1498
+ # NOTE: The way that recursion in Argo Workflows is achieved is with the following structure:
1499
+ # - the usual 'example-step' template which would match example_step in flow code is renamed to 'recursive-example-step'
1500
+ # - templates has another template with the original task name: 'example-step'
1501
+ # - the template 'example-step' in turn has steps
1502
+ # - 'example-step-internal' which uses the metaflow step executing template 'recursive-example-step'
1503
+ # - 'example-step-recursion' which calls the parent template 'example-step' if switch-step output from 'example-step-internal' matches the condition.
1504
+ sanitized_name = self._sanitize(node.name)
1505
+ templates.append(
1506
+ Template(sanitized_name)
1507
+ .steps(
1508
+ [
1509
+ WorkflowStep()
1510
+ .name("%s-internal" % sanitized_name)
1511
+ .template("recursive-%s" % sanitized_name)
1512
+ .arguments(
1513
+ Arguments().parameters(
1514
+ [
1515
+ Parameter("input-paths").value(
1516
+ "{{inputs.parameters.input-paths}}"
1517
+ )
1518
+ ]
1519
+ # Add the additional inputs required by specific node types.
1520
+ # We do not need to cover joins or @parallel, as a split-switch step can not be either one of these.
1521
+ + (
1522
+ [
1523
+ Parameter("split-index").value(
1524
+ "{{inputs.parameters.split-index}}"
1525
+ )
1526
+ ]
1527
+ if has_foreach_inputs
1528
+ else []
1529
+ )
1530
+ )
1531
+ )
1532
+ ]
1533
+ )
1534
+ .steps(
1535
+ [
1536
+ WorkflowStep()
1537
+ .name("%s-recursion" % sanitized_name)
1538
+ .template(sanitized_name)
1539
+ .when(
1540
+ "{{steps.%s-internal.outputs.parameters.switch-step}}==%s"
1541
+ % (sanitized_name, node.name)
1542
+ )
1543
+ .arguments(
1544
+ Arguments().parameters(
1545
+ [
1546
+ Parameter("input-paths").value(
1547
+ "argo-{{workflow.name}}/%s/{{steps.%s-internal.outputs.parameters.task-id}}"
1548
+ % (node.name, sanitized_name)
1549
+ )
1550
+ ]
1551
+ + (
1552
+ [
1553
+ Parameter("split-index").value(
1554
+ "{{inputs.parameters.split-index}}"
1555
+ )
1556
+ ]
1557
+ if has_foreach_inputs
1558
+ else []
1559
+ )
1560
+ )
1561
+ ),
1562
+ ]
1563
+ )
1564
+ .inputs(Inputs().parameters(parameters))
1565
+ .outputs(
1566
+ # NOTE: We try to read the output parameters from the recursive template call first (<step>-recursion), and the internal step second (<step>-internal).
1567
+ # This guarantees that we always get the output parameters of the last recursive step that executed.
1568
+ Outputs().parameters(
1569
+ [
1570
+ Parameter("task-id").valueFrom(
1571
+ {
1572
+ "expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['task-id']"
1573
+ % (sanitized_name, sanitized_name)
1574
+ }
1575
+ ),
1576
+ Parameter("switch-step").valueFrom(
1577
+ {
1578
+ "expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['switch-step']"
1579
+ % (sanitized_name, sanitized_name)
1580
+ }
1581
+ ),
1582
+ ]
1583
+ )
1584
+ )
1585
+ )
1586
+ for n in node.out_funcs:
1587
+ _visit(
1588
+ self.graph[n],
1589
+ self._matching_conditional_join(node),
1590
+ templates,
1591
+ dag_tasks,
1592
+ parent_foreach,
1593
+ seen,
1594
+ )
1595
+ return _visit(
1596
+ self.graph[self._matching_conditional_join(node)],
1597
+ exit_node,
1598
+ templates,
1599
+ dag_tasks,
1600
+ parent_foreach,
1601
+ seen,
1200
1602
  )
1201
1603
  # For foreach nodes generate a new sub DAGTemplate
1202
1604
  # We do this for "regular" foreaches (ie. `self.next(self.a, foreach=)`)
@@ -1225,7 +1627,7 @@ class ArgoWorkflows(object):
1225
1627
  #
1226
1628
  foreach_task = (
1227
1629
  DAGTask(foreach_template_name)
1228
- .dependencies([self._sanitize(node.name)])
1630
+ .depends(f"{self._sanitize(node.name)}.Succeeded")
1229
1631
  .template(foreach_template_name)
1230
1632
  .arguments(
1231
1633
  Arguments().parameters(
@@ -1270,6 +1672,16 @@ class ArgoWorkflows(object):
1270
1672
  % self._sanitize(node.name)
1271
1673
  )
1272
1674
  )
1675
+ # Add conditional if this is the first step in a conditional branch
1676
+ if self._is_conditional_node(node) and not any(
1677
+ self._is_conditional_node(self.graph[in_func])
1678
+ for in_func in node.in_funcs
1679
+ ):
1680
+ in_func = node.in_funcs[0]
1681
+ foreach_task.when(
1682
+ "{{tasks.%s.outputs.parameters.switch-step}}==%s"
1683
+ % (self._sanitize(in_func), node.name)
1684
+ )
1273
1685
  dag_tasks.append(foreach_task)
1274
1686
  templates, dag_tasks_1 = _visit(
1275
1687
  self.graph[node.out_funcs[0]],
@@ -1277,6 +1689,7 @@ class ArgoWorkflows(object):
1277
1689
  templates,
1278
1690
  [],
1279
1691
  node.name,
1692
+ seen,
1280
1693
  )
1281
1694
 
1282
1695
  # How do foreach's work on Argo:
@@ -1313,7 +1726,22 @@ class ArgoWorkflows(object):
1313
1726
  self.graph[node.matching_join].in_funcs[0]
1314
1727
  )
1315
1728
  }
1316
- )
1729
+ if not self._is_conditional_join_node(
1730
+ self.graph[node.matching_join]
1731
+ )
1732
+ else
1733
+ # Note: If the nodes leading to the join are conditional, then we need to use an expression to pick the outputs from the task that executed.
1734
+ # ref for operators: https://github.com/expr-lang/expr/blob/master/docs/language-definition.md
1735
+ {
1736
+ "expression": "get((%s)?.parameters, 'task-id')"
1737
+ % " ?? ".join(
1738
+ f"tasks['{self._sanitize(func)}']?.outputs"
1739
+ for func in self.graph[
1740
+ node.matching_join
1741
+ ].in_funcs
1742
+ )
1743
+ }
1744
+ ),
1317
1745
  ]
1318
1746
  if not node.parallel_foreach
1319
1747
  else [
@@ -1346,7 +1774,7 @@ class ArgoWorkflows(object):
1346
1774
  join_foreach_task = (
1347
1775
  DAGTask(self._sanitize(self.graph[node.matching_join].name))
1348
1776
  .template(self._sanitize(self.graph[node.matching_join].name))
1349
- .dependencies([foreach_template_name])
1777
+ .depends(f"{foreach_template_name}.Succeeded")
1350
1778
  .arguments(
1351
1779
  Arguments().parameters(
1352
1780
  (
@@ -1395,6 +1823,7 @@ class ArgoWorkflows(object):
1395
1823
  templates,
1396
1824
  dag_tasks,
1397
1825
  parent_foreach,
1826
+ seen,
1398
1827
  )
1399
1828
  # For linear nodes continue traversing to the next node
1400
1829
  if node.type in ("linear", "join", "start"):
@@ -1404,6 +1833,7 @@ class ArgoWorkflows(object):
1404
1833
  templates,
1405
1834
  dag_tasks,
1406
1835
  parent_foreach,
1836
+ seen,
1407
1837
  )
1408
1838
  else:
1409
1839
  raise ArgoWorkflowsException(
@@ -1417,7 +1847,11 @@ class ArgoWorkflows(object):
1417
1847
  for daemon_template in self._daemon_templates()
1418
1848
  ]
1419
1849
 
1420
- templates, _ = _visit(node=self.graph["start"], dag_tasks=daemon_tasks)
1850
+ templates, dag_tasks = _visit(node=self.graph["start"], dag_tasks=daemon_tasks)
1851
+ # Add the DAG template only after fully traversing the graph so we are guaranteed to have all the dag_tasks collected.
1852
+ templates.append(
1853
+ Template(self.flow.name).dag(DAGTemplate().fail_fast().tasks(dag_tasks))
1854
+ )
1421
1855
  return templates
1422
1856
 
1423
1857
  # Visit every node and yield ContainerTemplates.
@@ -1473,6 +1907,18 @@ class ArgoWorkflows(object):
1473
1907
  input_paths_expr = (
1474
1908
  "export INPUT_PATHS={{inputs.parameters.input-paths}}"
1475
1909
  )
1910
+ if (
1911
+ self._is_conditional_join_node(node)
1912
+ or self._many_in_funcs_all_conditional(node)
1913
+ or self._is_conditional_skip_node(node)
1914
+ ):
1915
+ # NOTE: Argo template expressions that fail to resolve, output the expression itself as a value.
1916
+ # With conditional steps, some of the input-paths are therefore 'broken' due to containing a nil expression
1917
+ # e.g. "{{ tasks['A'].outputs.parameters.task-id }}" when task A never executed.
1918
+ # We base64 encode the input-paths in order to not pollute the execution environment with templating expressions.
1919
+ # NOTE: Adding conditionals that check if a key exists or not does not work either, due to an issue with how Argo
1920
+ # handles tasks in a nested foreach (withParam template) leading to all such expressions getting evaluated as false.
1921
+ input_paths_expr = "export INPUT_PATHS={{=toBase64(inputs.parameters['input-paths'])}}"
1476
1922
  input_paths = "$(echo $INPUT_PATHS)"
1477
1923
  if any(self.graph[n].type == "foreach" for n in node.in_funcs):
1478
1924
  task_idx = "{{inputs.parameters.split-index}}"
@@ -1488,7 +1934,6 @@ class ArgoWorkflows(object):
1488
1934
  # foreaches
1489
1935
  task_idx = "{{inputs.parameters.split-index}}"
1490
1936
  root_input = "{{inputs.parameters.root-input-path}}"
1491
-
1492
1937
  # Task string to be hashed into an ID
1493
1938
  task_str = "-".join(
1494
1939
  [
@@ -1571,7 +2016,9 @@ class ArgoWorkflows(object):
1571
2016
  mflog_expr,
1572
2017
  ]
1573
2018
  + self.environment.get_package_commands(
1574
- self.code_package_url, self.flow_datastore.TYPE
2019
+ self.code_package_url,
2020
+ self.flow_datastore.TYPE,
2021
+ self.code_package_metadata,
1575
2022
  )
1576
2023
  )
1577
2024
  step_cmds = self.environment.bootstrap_commands(
@@ -1583,6 +2030,7 @@ class ArgoWorkflows(object):
1583
2030
  decorator.make_decorator_spec()
1584
2031
  for decorator in node.decorators
1585
2032
  if not decorator.statically_defined
2033
+ and decorator.inserted_by is None
1586
2034
  ]
1587
2035
  }
1588
2036
  # FlowDecorators can define their own top-level options. They are
@@ -1620,7 +2068,7 @@ class ArgoWorkflows(object):
1620
2068
  # {{foo.bar['param_name']}}.
1621
2069
  # https://argoproj.github.io/argo-events/tutorials/02-parameterization/
1622
2070
  # http://masterminds.github.io/sprig/strings.html
1623
- "--%s={{workflow.parameters.%s}}"
2071
+ "--%s=\\\"$(python -m metaflow.plugins.argo.param_val {{=toBase64(workflow.parameters['%s'])}})\\\""
1624
2072
  % (parameter["name"], parameter["name"])
1625
2073
  for parameter in self.parameters.values()
1626
2074
  ]
@@ -1642,10 +2090,40 @@ class ArgoWorkflows(object):
1642
2090
  ]
1643
2091
  )
1644
2092
  input_paths = "%s/_parameters/%s" % (run_id, task_id_params)
2093
+ # Only for static joins and conditional_joins
2094
+ elif (
2095
+ self._is_conditional_join_node(node)
2096
+ or self._many_in_funcs_all_conditional(node)
2097
+ or self._is_conditional_skip_node(node)
2098
+ ) and not (
2099
+ node.type == "join"
2100
+ and self.graph[node.split_parents[-1]].type == "foreach"
2101
+ ):
2102
+ # we need to pass in the set of conditional in_funcs to the pathspec generating script as in the case of split-switch skipping cases,
2103
+ # non-conditional input-paths need to be ignored in favour of conditional ones when they have executed.
2104
+ skippable_input_steps = ",".join(
2105
+ [
2106
+ in_func
2107
+ for in_func in node.in_funcs
2108
+ if self.graph[in_func].type == "split-switch"
2109
+ ]
2110
+ )
2111
+ input_paths = (
2112
+ "$(python -m metaflow.plugins.argo.conditional_input_paths %s %s)"
2113
+ % (input_paths, skippable_input_steps)
2114
+ )
1645
2115
  elif (
1646
2116
  node.type == "join"
1647
2117
  and self.graph[node.split_parents[-1]].type == "foreach"
1648
2118
  ):
2119
+ # foreach-joins straight out of conditional branches are not yet supported
2120
+ if self._is_conditional_join_node(node) and len(node.in_funcs) > 1:
2121
+ raise ArgoWorkflowsException(
2122
+ "Conditional steps inside a foreach that transition directly into a join step are not currently supported.\n"
2123
+ "As a workaround, add a common step after the conditional steps %s "
2124
+ "that will transition to a join."
2125
+ % ", ".join("*%s*" % f for f in node.in_funcs)
2126
+ )
1649
2127
  # Set aggregated input-paths for a for-each join
1650
2128
  foreach_step = next(
1651
2129
  n for n in node.in_funcs if self.graph[n].is_inside_foreach
@@ -1667,6 +2145,8 @@ class ArgoWorkflows(object):
1667
2145
  foreach_step,
1668
2146
  )
1669
2147
  )
2148
+ # NOTE: input-paths might be extremely lengthy so we dump these to disk instead of passing them directly to the cmd
2149
+ step_cmds.append("echo %s >> /tmp/mf-input-paths" % input_paths)
1670
2150
  step = [
1671
2151
  "step",
1672
2152
  node.name,
@@ -1674,7 +2154,7 @@ class ArgoWorkflows(object):
1674
2154
  "--task-id %s" % task_id,
1675
2155
  "--retry-count %s" % retry_count,
1676
2156
  "--max-user-code-retries %d" % user_code_retries,
1677
- "--input-paths %s" % input_paths,
2157
+ "--input-paths-filename /tmp/mf-input-paths",
1678
2158
  ]
1679
2159
  if node.parallel_step:
1680
2160
  step.append(
@@ -1749,6 +2229,7 @@ class ArgoWorkflows(object):
1749
2229
  **{
1750
2230
  # These values are needed by Metaflow to set it's internal
1751
2231
  # state appropriately.
2232
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
1752
2233
  "METAFLOW_CODE_URL": self.code_package_url,
1753
2234
  "METAFLOW_CODE_SHA": self.code_package_sha,
1754
2235
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
@@ -1887,7 +2368,7 @@ class ArgoWorkflows(object):
1887
2368
  [Parameter("num-parallel"), Parameter("task-id-entropy")]
1888
2369
  )
1889
2370
  else:
1890
- # append this only for joins of foreaches, not static splits
2371
+ # append these only for joins of foreaches, not static splits
1891
2372
  inputs.append(Parameter("split-cardinality"))
1892
2373
  # check if the node is a @parallel node.
1893
2374
  elif node.parallel_step:
@@ -1922,6 +2403,13 @@ class ArgoWorkflows(object):
1922
2403
  # are derived at runtime.
1923
2404
  if not (node.name == "end" or node.parallel_step):
1924
2405
  outputs = [Parameter("task-id").valueFrom({"path": "/mnt/out/task_id"})]
2406
+
2407
+ # If this step is a split-switch one, we need to output the switch step name
2408
+ if node.type == "split-switch":
2409
+ outputs.append(
2410
+ Parameter("switch-step").valueFrom({"path": "/mnt/out/switch_step"})
2411
+ )
2412
+
1925
2413
  if node.type == "foreach":
1926
2414
  # Emit split cardinality from foreach task
1927
2415
  outputs.append(
@@ -1955,17 +2443,10 @@ class ArgoWorkflows(object):
1955
2443
  and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
1956
2444
  }
1957
2445
 
1958
- # get initial configs
1959
- initial_configs = init_config()
1960
- for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_URL"]:
1961
- if entry not in initial_configs:
1962
- raise ArgoWorkflowsException(
1963
- f"{entry} was not found in metaflow config. Please make sure to run `outerbounds configure <...>` command which can be found on the Ourebounds UI or reach out to your Outerbounds support team."
1964
- )
1965
-
2446
+ # OBP configs
1966
2447
  additional_obp_configs = {
1967
- "OBP_PERIMETER": initial_configs["OBP_PERIMETER"],
1968
- "OBP_INTEGRATIONS_URL": initial_configs["OBP_INTEGRATIONS_URL"],
2448
+ "OBP_PERIMETER": self.initial_configs["OBP_PERIMETER"],
2449
+ "OBP_INTEGRATIONS_URL": self.initial_configs["OBP_INTEGRATIONS_URL"],
1969
2450
  }
1970
2451
 
1971
2452
  # Tmpfs variables
@@ -2021,6 +2502,7 @@ class ArgoWorkflows(object):
2021
2502
  namespace=resources["namespace"],
2022
2503
  image=resources["image"],
2023
2504
  image_pull_policy=resources["image_pull_policy"],
2505
+ image_pull_secrets=resources["image_pull_secrets"],
2024
2506
  service_account=resources["service_account"],
2025
2507
  secrets=(
2026
2508
  [
@@ -2166,8 +2648,13 @@ class ArgoWorkflows(object):
2166
2648
  )
2167
2649
  )
2168
2650
  else:
2651
+ template_name = self._sanitize(node.name)
2652
+ if self._is_recursive_node(node):
2653
+ # The recursive template has the original step name,
2654
+ # this becomes a template within the recursive ones 'steps'
2655
+ template_name = self._sanitize("recursive-%s" % node.name)
2169
2656
  yield (
2170
- Template(self._sanitize(node.name))
2657
+ Template(template_name)
2171
2658
  # Set @timeout values
2172
2659
  .active_deadline_seconds(run_time_limit)
2173
2660
  # Set service account
@@ -2209,6 +2696,17 @@ class ArgoWorkflows(object):
2209
2696
  .node_selectors(resources.get("node_selector"))
2210
2697
  # Set tolerations
2211
2698
  .tolerations(resources.get("tolerations"))
2699
+ # Set image pull secrets if present. We need to use pod_spec_patch due to Argo not supporting this on a template level.
2700
+ .pod_spec_patch(
2701
+ {
2702
+ "imagePullSecrets": [
2703
+ {"name": secret}
2704
+ for secret in resources["image_pull_secrets"]
2705
+ ]
2706
+ }
2707
+ if resources["image_pull_secrets"]
2708
+ else None
2709
+ )
2212
2710
  # Set container
2213
2711
  .container(
2214
2712
  # TODO: Unify the logic with kubernetes.py
@@ -2356,40 +2854,190 @@ class ArgoWorkflows(object):
2356
2854
  templates.append(self._heartbeat_daemon_template())
2357
2855
  return templates
2358
2856
 
2359
- # Return exit hook templates for workflow execution notifications.
2360
- def _exit_hook_templates(self):
2361
- templates = []
2857
+ # Return lifecycle hooks for workflow execution notifications.
2858
+ def _lifecycle_hooks(self):
2859
+ hooks = []
2362
2860
  if self.notify_on_error:
2363
- templates.append(self._slack_error_template())
2364
- templates.append(self._pager_duty_alert_template())
2365
- templates.append(self._incident_io_alert_template())
2861
+ hooks.append(self._slack_error_template())
2862
+ hooks.append(self._pager_duty_alert_template())
2863
+ hooks.append(self._incident_io_alert_template())
2366
2864
  if self.notify_on_success:
2367
- templates.append(self._slack_success_template())
2368
- templates.append(self._pager_duty_change_template())
2369
- templates.append(self._incident_io_change_template())
2865
+ hooks.append(self._slack_success_template())
2866
+ hooks.append(self._pager_duty_change_template())
2867
+ hooks.append(self._incident_io_change_template())
2868
+
2869
+ exit_hook_decos = self.flow._flow_decorators.get("exit_hook", [])
2870
+
2871
+ for deco in exit_hook_decos:
2872
+ hooks.extend(self._lifecycle_hook_from_deco(deco))
2370
2873
 
2371
2874
  # Clean up None values from templates.
2372
- templates = list(filter(None, templates))
2373
-
2374
- if self.notify_on_error or self.notify_on_success:
2375
- # Warning: terrible hack to workaround a bug in Argo Workflow where the
2376
- # templates listed above do not execute unless there is an
2377
- # explicit exit hook. as and when this bug is patched, we should
2378
- # remove this effectively no-op template.
2379
- # Note: We use the Http template because changing this to an actual no-op container had the side-effect of
2380
- # leaving LifecycleHooks in a pending state even when they have finished execution.
2381
- templates.append(
2382
- Template("exit-hook-hack").http(
2383
- Http("GET")
2384
- .url(
2875
+ hooks = list(filter(None, hooks))
2876
+
2877
+ if hooks:
2878
+ hooks.append(
2879
+ ExitHookHack(
2880
+ url=(
2385
2881
  self.notify_slack_webhook_url
2386
2882
  or "https://events.pagerduty.com/v2/enqueue"
2387
2883
  )
2388
- .success_condition("true == true")
2389
2884
  )
2390
2885
  )
2886
+ return hooks
2887
+
2888
+ def _lifecycle_hook_from_deco(self, deco):
2889
+ from kubernetes import client as kubernetes_sdk
2890
+
2891
+ start_step = [step for step in self.graph if step.name == "start"][0]
2892
+ # We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
2893
+ # and it might contain the required libraries, allowing us to start up faster.
2894
+ start_kube_deco = [
2895
+ deco for deco in start_step.decorators if deco.name == "kubernetes"
2896
+ ][0]
2897
+ resources = dict(start_kube_deco.attributes)
2898
+ kube_defaults = dict(start_kube_deco.defaults)
2899
+
2900
+ # OBP Configs
2901
+ additional_obp_configs = {
2902
+ "OBP_PERIMETER": self.initial_configs["OBP_PERIMETER"],
2903
+ "OBP_INTEGRATIONS_URL": self.initial_configs["OBP_INTEGRATIONS_URL"],
2904
+ }
2905
+
2906
+ run_id_template = "argo-{{workflow.name}}"
2907
+ metaflow_version = self.environment.get_environment_info()
2908
+ metaflow_version["flow_name"] = self.graph.name
2909
+ metaflow_version["production_token"] = self.production_token
2910
+ env = {
2911
+ # These values are needed by Metaflow to set it's internal
2912
+ # state appropriately.
2913
+ "METAFLOW_CODE_URL": self.code_package_url,
2914
+ "METAFLOW_CODE_SHA": self.code_package_sha,
2915
+ "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
2916
+ "METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
2917
+ "METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
2918
+ "METAFLOW_USER": "argo-workflows",
2919
+ "METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
2920
+ "METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
2921
+ "METAFLOW_OWNER": self.username,
2922
+ }
2923
+ # pass on the Run pathspec for script
2924
+ env["RUN_PATHSPEC"] = f"{self.graph.name}/{run_id_template}"
2925
+
2926
+ # support Metaflow sandboxes
2927
+ env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
2928
+
2929
+ # support fetching secrets
2930
+ env.update(additional_obp_configs)
2931
+
2932
+ env["METAFLOW_WORKFLOW_NAME"] = "{{workflow.name}}"
2933
+ env["METAFLOW_WORKFLOW_NAMESPACE"] = "{{workflow.namespace}}"
2934
+ env = {
2935
+ k: v
2936
+ for k, v in env.items()
2937
+ if v is not None
2938
+ and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
2939
+ }
2940
+
2941
+ def _cmd(fn_name):
2942
+ mflog_expr = export_mflog_env_vars(
2943
+ datastore_type=self.flow_datastore.TYPE,
2944
+ stdout_path="$PWD/.logs/mflog_stdout",
2945
+ stderr_path="$PWD/.logs/mflog_stderr",
2946
+ flow_name=self.flow.name,
2947
+ run_id=run_id_template,
2948
+ step_name=f"_hook_{fn_name}",
2949
+ task_id="1",
2950
+ retry_count="0",
2951
+ )
2952
+ cmds = " && ".join(
2953
+ [
2954
+ # For supporting sandboxes, ensure that a custom script is executed
2955
+ # before anything else is executed. The script is passed in as an
2956
+ # env var.
2957
+ '${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
2958
+ "mkdir -p $PWD/.logs",
2959
+ mflog_expr,
2960
+ ]
2961
+ + self.environment.get_package_commands(
2962
+ self.code_package_url, self.flow_datastore.TYPE
2963
+ )[:-1]
2964
+ # Replace the line 'Task in starting'
2965
+ + [f"mflog 'Lifecycle hook {fn_name} is starting.'"]
2966
+ + [
2967
+ f"python -m metaflow.plugins.exit_hook.exit_hook_script {metaflow_version['script']} {fn_name} $RUN_PATHSPEC"
2968
+ ]
2969
+ )
2970
+
2971
+ cmds = shlex.split('bash -c "%s"' % cmds)
2972
+ return cmds
2973
+
2974
+ def _container(cmds):
2975
+ return to_camelcase(
2976
+ kubernetes_sdk.V1Container(
2977
+ name="main",
2978
+ command=cmds,
2979
+ image=deco.attributes["options"].get("image", None)
2980
+ or resources["image"],
2981
+ env=[
2982
+ kubernetes_sdk.V1EnvVar(name=k, value=str(v))
2983
+ for k, v in env.items()
2984
+ ],
2985
+ env_from=[
2986
+ kubernetes_sdk.V1EnvFromSource(
2987
+ secret_ref=kubernetes_sdk.V1SecretEnvSource(
2988
+ name=str(k),
2989
+ # optional=True
2990
+ )
2991
+ )
2992
+ for k in list(
2993
+ []
2994
+ if not resources.get("secrets")
2995
+ else (
2996
+ [resources.get("secrets")]
2997
+ if isinstance(resources.get("secrets"), str)
2998
+ else resources.get("secrets")
2999
+ )
3000
+ )
3001
+ + KUBERNETES_SECRETS.split(",")
3002
+ + ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
3003
+ if k
3004
+ ],
3005
+ resources=kubernetes_sdk.V1ResourceRequirements(
3006
+ requests={
3007
+ "cpu": str(kube_defaults["cpu"]),
3008
+ "memory": "%sM" % str(kube_defaults["memory"]),
3009
+ }
3010
+ ),
3011
+ ).to_dict()
3012
+ )
3013
+
3014
+ # create lifecycle hooks from deco
3015
+ hooks = []
3016
+ for success_fn_name in deco.success_hooks:
3017
+ hook = ContainerHook(
3018
+ name=f"success-{success_fn_name.replace('_', '-')}",
3019
+ container=_container(cmds=_cmd(success_fn_name)),
3020
+ service_account_name=resources["service_account"],
3021
+ on_success=True,
3022
+ )
3023
+ hooks.append(hook)
3024
+
3025
+ for error_fn_name in deco.error_hooks:
3026
+ hook = ContainerHook(
3027
+ name=f"error-{error_fn_name.replace('_', '-')}",
3028
+ service_account_name=resources["service_account"],
3029
+ container=_container(cmds=_cmd(error_fn_name)),
3030
+ on_error=True,
3031
+ )
3032
+ hooks.append(hook)
3033
+
3034
+ return hooks
3035
+
3036
+ def _exit_hook_templates(self):
3037
+ templates = []
2391
3038
  if self.enable_error_msg_capture:
2392
3039
  templates.extend(self._error_msg_capture_hook_templates())
3040
+
2393
3041
  return templates
2394
3042
 
2395
3043
  def _error_msg_capture_hook_templates(self):
@@ -2430,7 +3078,9 @@ class ArgoWorkflows(object):
2430
3078
  mflog_expr,
2431
3079
  ]
2432
3080
  + self.environment.get_package_commands(
2433
- self.code_package_url, self.flow_datastore.TYPE
3081
+ self.code_package_url,
3082
+ self.flow_datastore.TYPE,
3083
+ self.code_package_metadata,
2434
3084
  )[:-1]
2435
3085
  # Replace the line 'Task in starting'
2436
3086
  # FIXME: this can be brittle.
@@ -2450,6 +3100,7 @@ class ArgoWorkflows(object):
2450
3100
  env = {
2451
3101
  # These values are needed by Metaflow to set it's internal
2452
3102
  # state appropriately.
3103
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
2453
3104
  "METAFLOW_CODE_URL": self.code_package_url,
2454
3105
  "METAFLOW_CODE_SHA": self.code_package_sha,
2455
3106
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
@@ -2538,30 +3189,30 @@ class ArgoWorkflows(object):
2538
3189
  # https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgx-send-an-alert-event
2539
3190
  if self.notify_pager_duty_integration_key is None:
2540
3191
  return None
2541
- return Template("notify-pager-duty-on-error").http(
2542
- Http("POST")
2543
- .url("https://events.pagerduty.com/v2/enqueue")
2544
- .header("Content-Type", "application/json")
2545
- .body(
2546
- json.dumps(
2547
- {
2548
- "event_action": "trigger",
2549
- "routing_key": self.notify_pager_duty_integration_key,
2550
- # "dedup_key": self.flow.name, # TODO: Do we need deduplication?
2551
- "payload": {
2552
- "source": "{{workflow.name}}",
2553
- "severity": "info",
2554
- "summary": "Metaflow run %s/argo-{{workflow.name}} failed!"
2555
- % self.flow.name,
2556
- "custom_details": {
2557
- "Flow": self.flow.name,
2558
- "Run ID": "argo-{{workflow.name}}",
2559
- },
3192
+ return HttpExitHook(
3193
+ name="notify-pager-duty-on-error",
3194
+ method="POST",
3195
+ url="https://events.pagerduty.com/v2/enqueue",
3196
+ headers={"Content-Type": "application/json"},
3197
+ body=json.dumps(
3198
+ {
3199
+ "event_action": "trigger",
3200
+ "routing_key": self.notify_pager_duty_integration_key,
3201
+ # "dedup_key": self.flow.name, # TODO: Do we need deduplication?
3202
+ "payload": {
3203
+ "source": "{{workflow.name}}",
3204
+ "severity": "info",
3205
+ "summary": "Metaflow run %s/argo-{{workflow.name}} failed!"
3206
+ % self.flow.name,
3207
+ "custom_details": {
3208
+ "Flow": self.flow.name,
3209
+ "Run ID": "argo-{{workflow.name}}",
2560
3210
  },
2561
- "links": self._pager_duty_notification_links(),
2562
- }
2563
- )
2564
- )
3211
+ },
3212
+ "links": self._pager_duty_notification_links(),
3213
+ }
3214
+ ),
3215
+ on_error=True,
2565
3216
  )
2566
3217
 
2567
3218
  def _incident_io_alert_template(self):
@@ -2572,50 +3223,52 @@ class ArgoWorkflows(object):
2572
3223
  "Creating alerts for errors requires a alert source config ID."
2573
3224
  )
2574
3225
  ui_links = self._incident_io_ui_urls_for_run()
2575
- return Template("notify-incident-io-on-error").http(
2576
- Http("POST")
2577
- .url(
3226
+ return HttpExitHook(
3227
+ name="notify-incident-io-on-error",
3228
+ method="POST",
3229
+ url=(
2578
3230
  "https://api.incident.io/v2/alert_events/http/%s"
2579
3231
  % self.incident_io_alert_source_config_id
2580
- )
2581
- .header("Content-Type", "application/json")
2582
- .header("Authorization", "Bearer %s" % self.notify_incident_io_api_key)
2583
- .body(
2584
- json.dumps(
2585
- {
2586
- "idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
2587
- "status": "firing",
2588
- "title": "Flow %s has failed." % self.flow.name,
2589
- "description": "Metaflow run {run_pathspec} failed!{urls}".format(
2590
- run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
2591
- urls=(
2592
- "\n\nSee details for the run at:\n\n"
2593
- + "\n\n".join(ui_links)
2594
- if ui_links
2595
- else ""
2596
- ),
2597
- ),
2598
- "source_url": (
2599
- "%s/%s/%s"
2600
- % (
2601
- UI_URL.rstrip("/"),
2602
- self.flow.name,
2603
- "argo-{{workflow.name}}",
2604
- )
2605
- if UI_URL
2606
- else None
3232
+ ),
3233
+ headers={
3234
+ "Content-Type": "application/json",
3235
+ "Authorization": "Bearer %s" % self.notify_incident_io_api_key,
3236
+ },
3237
+ body=json.dumps(
3238
+ {
3239
+ "idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
3240
+ "status": "firing",
3241
+ "title": "Flow %s has failed." % self.flow.name,
3242
+ "description": "Metaflow run {run_pathspec} failed!{urls}".format(
3243
+ run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
3244
+ urls=(
3245
+ "\n\nSee details for the run at:\n\n"
3246
+ + "\n\n".join(ui_links)
3247
+ if ui_links
3248
+ else ""
2607
3249
  ),
2608
- "metadata": {
2609
- **(self.incident_io_metadata or {}),
2610
- **{
2611
- "run_status": "failed",
2612
- "flow_name": self.flow.name,
2613
- "run_id": "argo-{{workflow.name}}",
2614
- },
3250
+ ),
3251
+ "source_url": (
3252
+ "%s/%s/%s"
3253
+ % (
3254
+ UI_URL.rstrip("/"),
3255
+ self.flow.name,
3256
+ "argo-{{workflow.name}}",
3257
+ )
3258
+ if UI_URL
3259
+ else None
3260
+ ),
3261
+ "metadata": {
3262
+ **(self.incident_io_metadata or {}),
3263
+ **{
3264
+ "run_status": "failed",
3265
+ "flow_name": self.flow.name,
3266
+ "run_id": "argo-{{workflow.name}}",
2615
3267
  },
2616
- }
2617
- )
2618
- )
3268
+ },
3269
+ }
3270
+ ),
3271
+ on_error=True,
2619
3272
  )
2620
3273
 
2621
3274
  def _incident_io_change_template(self):
@@ -2626,50 +3279,52 @@ class ArgoWorkflows(object):
2626
3279
  "Creating alerts for successes requires an alert source config ID."
2627
3280
  )
2628
3281
  ui_links = self._incident_io_ui_urls_for_run()
2629
- return Template("notify-incident-io-on-success").http(
2630
- Http("POST")
2631
- .url(
3282
+ return HttpExitHook(
3283
+ name="notify-incident-io-on-success",
3284
+ method="POST",
3285
+ url=(
2632
3286
  "https://api.incident.io/v2/alert_events/http/%s"
2633
3287
  % self.incident_io_alert_source_config_id
2634
- )
2635
- .header("Content-Type", "application/json")
2636
- .header("Authorization", "Bearer %s" % self.notify_incident_io_api_key)
2637
- .body(
2638
- json.dumps(
2639
- {
2640
- "idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
2641
- "status": "firing",
2642
- "title": "Flow %s has succeeded." % self.flow.name,
2643
- "description": "Metaflow run {run_pathspec} succeeded!{urls}".format(
2644
- run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
2645
- urls=(
2646
- "\n\nSee details for the run at:\n\n"
2647
- + "\n\n".join(ui_links)
2648
- if ui_links
2649
- else ""
2650
- ),
2651
- ),
2652
- "source_url": (
2653
- "%s/%s/%s"
2654
- % (
2655
- UI_URL.rstrip("/"),
2656
- self.flow.name,
2657
- "argo-{{workflow.name}}",
2658
- )
2659
- if UI_URL
2660
- else None
3288
+ ),
3289
+ headers={
3290
+ "Content-Type": "application/json",
3291
+ "Authorization": "Bearer %s" % self.notify_incident_io_api_key,
3292
+ },
3293
+ body=json.dumps(
3294
+ {
3295
+ "idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
3296
+ "status": "firing",
3297
+ "title": "Flow %s has succeeded." % self.flow.name,
3298
+ "description": "Metaflow run {run_pathspec} succeeded!{urls}".format(
3299
+ run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
3300
+ urls=(
3301
+ "\n\nSee details for the run at:\n\n"
3302
+ + "\n\n".join(ui_links)
3303
+ if ui_links
3304
+ else ""
2661
3305
  ),
2662
- "metadata": {
2663
- **(self.incident_io_metadata or {}),
2664
- **{
2665
- "run_status": "succeeded",
2666
- "flow_name": self.flow.name,
2667
- "run_id": "argo-{{workflow.name}}",
2668
- },
3306
+ ),
3307
+ "source_url": (
3308
+ "%s/%s/%s"
3309
+ % (
3310
+ UI_URL.rstrip("/"),
3311
+ self.flow.name,
3312
+ "argo-{{workflow.name}}",
3313
+ )
3314
+ if UI_URL
3315
+ else None
3316
+ ),
3317
+ "metadata": {
3318
+ **(self.incident_io_metadata or {}),
3319
+ **{
3320
+ "run_status": "succeeded",
3321
+ "flow_name": self.flow.name,
3322
+ "run_id": "argo-{{workflow.name}}",
2669
3323
  },
2670
- }
2671
- )
2672
- )
3324
+ },
3325
+ }
3326
+ ),
3327
+ on_success=True,
2673
3328
  )
2674
3329
 
2675
3330
  def _incident_io_ui_urls_for_run(self):
@@ -2694,27 +3349,27 @@ class ArgoWorkflows(object):
2694
3349
  # https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgy-send-a-change-event
2695
3350
  if self.notify_pager_duty_integration_key is None:
2696
3351
  return None
2697
- return Template("notify-pager-duty-on-success").http(
2698
- Http("POST")
2699
- .url("https://events.pagerduty.com/v2/change/enqueue")
2700
- .header("Content-Type", "application/json")
2701
- .body(
2702
- json.dumps(
2703
- {
2704
- "routing_key": self.notify_pager_duty_integration_key,
2705
- "payload": {
2706
- "summary": "Metaflow run %s/argo-{{workflow.name}} Succeeded"
2707
- % self.flow.name,
2708
- "source": "{{workflow.name}}",
2709
- "custom_details": {
2710
- "Flow": self.flow.name,
2711
- "Run ID": "argo-{{workflow.name}}",
2712
- },
3352
+ return HttpExitHook(
3353
+ name="notify-pager-duty-on-success",
3354
+ method="POST",
3355
+ url="https://events.pagerduty.com/v2/change/enqueue",
3356
+ headers={"Content-Type": "application/json"},
3357
+ body=json.dumps(
3358
+ {
3359
+ "routing_key": self.notify_pager_duty_integration_key,
3360
+ "payload": {
3361
+ "summary": "Metaflow run %s/argo-{{workflow.name}} Succeeded"
3362
+ % self.flow.name,
3363
+ "source": "{{workflow.name}}",
3364
+ "custom_details": {
3365
+ "Flow": self.flow.name,
3366
+ "Run ID": "argo-{{workflow.name}}",
2713
3367
  },
2714
- "links": self._pager_duty_notification_links(),
2715
- }
2716
- )
2717
- )
3368
+ },
3369
+ "links": self._pager_duty_notification_links(),
3370
+ }
3371
+ ),
3372
+ on_success=True,
2718
3373
  )
2719
3374
 
2720
3375
  def _pager_duty_notification_links(self):
@@ -2836,8 +3491,12 @@ class ArgoWorkflows(object):
2836
3491
  blocks = self._get_slack_blocks(message)
2837
3492
  payload = {"text": message, "blocks": blocks}
2838
3493
 
2839
- return Template("notify-slack-on-error").http(
2840
- Http("POST").url(self.notify_slack_webhook_url).body(json.dumps(payload))
3494
+ return HttpExitHook(
3495
+ name="notify-slack-on-error",
3496
+ method="POST",
3497
+ url=self.notify_slack_webhook_url,
3498
+ body=json.dumps(payload),
3499
+ on_error=True,
2841
3500
  )
2842
3501
 
2843
3502
  def _slack_success_template(self):
@@ -2852,8 +3511,12 @@ class ArgoWorkflows(object):
2852
3511
  blocks = self._get_slack_blocks(message)
2853
3512
  payload = {"text": message, "blocks": blocks}
2854
3513
 
2855
- return Template("notify-slack-on-success").http(
2856
- Http("POST").url(self.notify_slack_webhook_url).body(json.dumps(payload))
3514
+ return HttpExitHook(
3515
+ name="notify-slack-on-success",
3516
+ method="POST",
3517
+ url=self.notify_slack_webhook_url,
3518
+ body=json.dumps(payload),
3519
+ on_success=True,
2857
3520
  )
2858
3521
 
2859
3522
  def _heartbeat_daemon_template(self):
@@ -2912,7 +3575,8 @@ class ArgoWorkflows(object):
2912
3575
  mflog_expr,
2913
3576
  ]
2914
3577
  + self.environment.get_package_commands(
2915
- self.code_package_url, self.flow_datastore.TYPE
3578
+ self.code_package_url,
3579
+ self.flow_datastore.TYPE,
2916
3580
  )[:-1]
2917
3581
  # Replace the line 'Task in starting'
2918
3582
  # FIXME: this can be brittle.
@@ -2927,6 +3591,7 @@ class ArgoWorkflows(object):
2927
3591
  env = {
2928
3592
  # These values are needed by Metaflow to set it's internal
2929
3593
  # state appropriately.
3594
+ "METAFLOW_CODE_METADATA": self.code_package_metadata,
2930
3595
  "METAFLOW_CODE_URL": self.code_package_url,
2931
3596
  "METAFLOW_CODE_SHA": self.code_package_sha,
2932
3597
  "METAFLOW_CODE_DS": self.flow_datastore.TYPE,
@@ -3125,7 +3790,7 @@ class ArgoWorkflows(object):
3125
3790
  # Sensor metadata.
3126
3791
  ObjectMeta()
3127
3792
  .name(ArgoWorkflows._sensor_name(self.name))
3128
- .namespace(KUBERNETES_NAMESPACE)
3793
+ .namespace(ARGO_EVENTS_SENSOR_NAMESPACE)
3129
3794
  .labels(self._base_labels)
3130
3795
  .label("app.kubernetes.io/name", "metaflow-sensor")
3131
3796
  .annotations(self._base_annotations)
@@ -3175,8 +3840,8 @@ class ArgoWorkflows(object):
3175
3840
  Trigger().template(
3176
3841
  TriggerTemplate(self.name)
3177
3842
  # Trigger a deployed workflow template
3178
- .argo_workflow_trigger(
3179
- ArgoWorkflowTrigger()
3843
+ .k8s_trigger(
3844
+ StandardK8STrigger()
3180
3845
  .source(
3181
3846
  {
3182
3847
  "resource": {
@@ -3235,37 +3900,27 @@ class ArgoWorkflows(object):
3235
3900
  # NOTE: We need the conditional logic in order to successfully fall back to the default value
3236
3901
  # when the event payload does not contain a key for a parameter.
3237
3902
  # NOTE: Keys might contain dashes, so use the safer 'get' for fetching the value
3238
- data_template='{{ if (hasKey $.Input.body.payload "%s") }}{{- (get $.Input.body.payload "%s" %s) -}}{{- else -}}{{ (fail "use-default-instead") }}{{- end -}}'
3903
+ data_template='{{ if (hasKey $.Input.body.payload "%s") }}%s{{- else -}}{{ (fail "use-default-instead") }}{{- end -}}'
3239
3904
  % (
3240
- v,
3241
3905
  v,
3242
3906
  (
3243
- "| toRawJson | squote"
3907
+ '{{- $pv:=(get $.Input.body.payload "%s") -}}{{ if kindIs "string" $pv }}{{- $pv | toRawJson -}}{{- else -}}{{ $pv | toRawJson | toRawJson }}{{- end -}}'
3908
+ % v
3244
3909
  if self.parameters[
3245
3910
  parameter_name
3246
3911
  ]["type"]
3247
3912
  == "JSON"
3248
- else "| toRawJson"
3913
+ else '{{- (get $.Input.body.payload "%s" | toRawJson) -}}'
3914
+ % v
3249
3915
  ),
3250
3916
  ),
3251
3917
  # Unfortunately the sensor needs to
3252
3918
  # record the default values for
3253
3919
  # the parameters - there doesn't seem
3254
3920
  # to be any way for us to skip
3255
- value=(
3256
- json.dumps(
3257
- self.parameters[parameter_name][
3258
- "value"
3259
- ]
3260
- )
3261
- if self.parameters[parameter_name][
3262
- "type"
3263
- ]
3264
- == "JSON"
3265
- else self.parameters[
3266
- parameter_name
3267
- ]["value"]
3268
- ),
3921
+ value=self.parameters[parameter_name][
3922
+ "value"
3923
+ ],
3269
3924
  )
3270
3925
  .dest(
3271
3926
  # this undocumented (mis?)feature in
@@ -3482,6 +4137,10 @@ class WorkflowStep(object):
3482
4137
  self.payload["template"] = str(template)
3483
4138
  return self
3484
4139
 
4140
+ def arguments(self, arguments):
4141
+ self.payload["arguments"] = arguments.to_json()
4142
+ return self
4143
+
3485
4144
  def when(self, condition):
3486
4145
  self.payload["when"] = str(condition)
3487
4146
  return self
@@ -3774,6 +4433,14 @@ class Template(object):
3774
4433
  )
3775
4434
  return self
3776
4435
 
4436
+ def pod_spec_patch(self, pod_spec_patch=None):
4437
+ if pod_spec_patch is None:
4438
+ return self
4439
+
4440
+ self.payload["podSpecPatch"] = json.dumps(pod_spec_patch)
4441
+
4442
+ return self
4443
+
3777
4444
  def node_selectors(self, node_selectors):
3778
4445
  if "nodeSelector" not in self.payload:
3779
4446
  self.payload["nodeSelector"] = {}
@@ -3916,6 +4583,10 @@ class DAGTask(object):
3916
4583
  self.payload["dependencies"] = dependencies
3917
4584
  return self
3918
4585
 
4586
+ def depends(self, depends: str):
4587
+ self.payload["depends"] = depends
4588
+ return self
4589
+
3919
4590
  def template(self, template):
3920
4591
  # Template reference
3921
4592
  self.payload["template"] = template
@@ -3927,6 +4598,10 @@ class DAGTask(object):
3927
4598
  self.payload["inline"] = template.to_json()
3928
4599
  return self
3929
4600
 
4601
+ def when(self, when: str):
4602
+ self.payload["when"] = when
4603
+ return self
4604
+
3930
4605
  def with_param(self, with_param):
3931
4606
  self.payload["withParam"] = with_param
3932
4607
  return self
@@ -4146,6 +4821,10 @@ class TriggerTemplate(object):
4146
4821
  self.payload = tree()
4147
4822
  self.payload["name"] = name
4148
4823
 
4824
+ def k8s_trigger(self, k8s_trigger):
4825
+ self.payload["k8s"] = k8s_trigger.to_json()
4826
+ return self
4827
+
4149
4828
  def argo_workflow_trigger(self, argo_workflow_trigger):
4150
4829
  self.payload["argoWorkflow"] = argo_workflow_trigger.to_json()
4151
4830
  return self
@@ -4222,51 +4901,51 @@ class TriggerParameter(object):
4222
4901
  return json.dumps(self.payload, indent=4)
4223
4902
 
4224
4903
 
4225
- class Http(object):
4226
- # https://argoproj.github.io/argo-workflows/fields/#http
4904
+ class StandardK8STrigger(object):
4905
+ # https://pkg.go.dev/github.com/argoproj/argo-events/pkg/apis/sensor/v1alpha1#StandardK8STrigger
4227
4906
 
4228
- def __init__(self, method):
4907
+ def __init__(self):
4229
4908
  tree = lambda: defaultdict(tree)
4230
4909
  self.payload = tree()
4231
- self.payload["method"] = method
4232
- self.payload["headers"] = []
4910
+ self.payload["operation"] = "create"
4233
4911
 
4234
- def header(self, header, value):
4235
- self.payload["headers"].append({"name": header, "value": value})
4912
+ def operation(self, operation):
4913
+ self.payload["operation"] = operation
4236
4914
  return self
4237
4915
 
4238
- def body(self, body):
4239
- self.payload["body"] = str(body)
4916
+ def group(self, group):
4917
+ self.payload["group"] = group
4240
4918
  return self
4241
4919
 
4242
- def url(self, url):
4243
- self.payload["url"] = url
4920
+ def version(self, version):
4921
+ self.payload["version"] = version
4244
4922
  return self
4245
4923
 
4246
- def success_condition(self, success_condition):
4247
- self.payload["successCondition"] = success_condition
4924
+ def resource(self, resource):
4925
+ self.payload["resource"] = resource
4248
4926
  return self
4249
4927
 
4250
- def to_json(self):
4251
- return self.payload
4252
-
4253
- def __str__(self):
4254
- return json.dumps(self.payload, indent=4)
4255
-
4928
+ def namespace(self, namespace):
4929
+ self.payload["namespace"] = namespace
4930
+ return self
4256
4931
 
4257
- class LifecycleHook(object):
4258
- # https://argoproj.github.io/argo-workflows/fields/#lifecyclehook
4932
+ def source(self, source):
4933
+ self.payload["source"] = source
4934
+ return self
4259
4935
 
4260
- def __init__(self):
4261
- tree = lambda: defaultdict(tree)
4262
- self.payload = tree()
4936
+ def parameters(self, trigger_parameters):
4937
+ if "parameters" not in self.payload:
4938
+ self.payload["parameters"] = []
4939
+ for trigger_parameter in trigger_parameters:
4940
+ self.payload["parameters"].append(trigger_parameter.to_json())
4941
+ return self
4263
4942
 
4264
- def expression(self, expression):
4265
- self.payload["expression"] = str(expression)
4943
+ def live_object(self, live_object=True):
4944
+ self.payload["liveObject"] = live_object
4266
4945
  return self
4267
4946
 
4268
- def template(self, template):
4269
- self.payload["template"] = template
4947
+ def patch_strategy(self, patch_strategy):
4948
+ self.payload["patchStrategy"] = patch_strategy
4270
4949
  return self
4271
4950
 
4272
4951
  def to_json(self):