ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +10 -3
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +4 -0
- metaflow/cli.py +125 -21
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +204 -40
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +198 -130
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/cmd/develop/stub_generator.py +49 -18
- metaflow/cmd/develop/stubs.py +9 -27
- metaflow/cmd/make_wrapper.py +30 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +124 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +92 -6
- metaflow/debug.py +5 -0
- metaflow/decorators.py +331 -82
- metaflow/extension_support/__init__.py +414 -356
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +322 -82
- metaflow/graph.py +178 -15
- metaflow/includefile.py +25 -3
- metaflow/lint.py +94 -3
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +13 -2
- metaflow/metaflow_config.py +66 -4
- metaflow/metaflow_environment.py +91 -25
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +16 -1
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +6 -2
- metaflow/plugins/__init__.py +6 -0
- metaflow/plugins/airflow/airflow.py +11 -1
- metaflow/plugins/airflow/airflow_cli.py +16 -5
- metaflow/plugins/argo/argo_client.py +42 -20
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +1023 -344
- metaflow/plugins/argo/argo_workflows_cli.py +396 -94
- metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
- metaflow/plugins/argo/capture_error.py +5 -2
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +6 -0
- metaflow/plugins/aws/aws_utils.py +33 -1
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +24 -3
- metaflow/plugins/aws/batch/batch_decorator.py +57 -6
- metaflow/plugins/aws/step_functions/step_functions.py +28 -3
- metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_cli.py +20 -1
- metaflow/plugins/cards/card_creator.py +24 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +58 -6
- metaflow/plugins/cards/card_modules/basic.py +38 -9
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +592 -3
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +56 -41
- metaflow/plugins/cards/card_modules/test_cards.py +22 -6
- metaflow/plugins/cards/component_serializer.py +1 -8
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +49 -17
- metaflow/plugins/datatools/s3/s3op.py +113 -66
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/events_decorator.py +127 -121
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/kubernetes/kubernetes.py +12 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
- metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/service.py +13 -9
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +11 -2
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/pypi/bootstrap.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +41 -82
- metaflow/plugins/pypi/conda_environment.py +14 -6
- metaflow/plugins/pypi/micromamba.py +9 -1
- metaflow/plugins/pypi/pip.py +41 -5
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/pypi/utils.py +22 -0
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +14 -178
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +29 -1
- metaflow/plugins/uv/uv_environment.py +5 -3
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +79 -26
- metaflow/runner/deployer.py +208 -6
- metaflow/runner/deployer_impl.py +32 -12
- metaflow/runner/metaflow_runner.py +266 -33
- metaflow/runner/subprocess_manager.py +21 -1
- metaflow/runner/utils.py +27 -16
- metaflow/runtime.py +660 -66
- metaflow/task.py +255 -26
- metaflow/user_configs/config_options.py +33 -21
- metaflow/user_configs/config_parameters.py +220 -58
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +197 -7
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/_vendor/v3_5/zipp.py +0 -329
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,7 @@ from metaflow.metaflow_config import (
|
|
|
19
19
|
ARGO_EVENTS_EVENT_BUS,
|
|
20
20
|
ARGO_EVENTS_EVENT_SOURCE,
|
|
21
21
|
ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
|
|
22
|
+
ARGO_EVENTS_SENSOR_NAMESPACE,
|
|
22
23
|
ARGO_EVENTS_SERVICE_ACCOUNT,
|
|
23
24
|
ARGO_EVENTS_WEBHOOK_AUTH,
|
|
24
25
|
ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT,
|
|
@@ -66,6 +67,7 @@ from metaflow.util import (
|
|
|
66
67
|
)
|
|
67
68
|
|
|
68
69
|
from .argo_client import ArgoClient
|
|
70
|
+
from .exit_hooks import ExitHookHack, HttpExitHook, ContainerHook
|
|
69
71
|
from metaflow.util import resolve_identity
|
|
70
72
|
|
|
71
73
|
|
|
@@ -73,6 +75,10 @@ class ArgoWorkflowsException(MetaflowException):
|
|
|
73
75
|
headline = "Argo Workflows error"
|
|
74
76
|
|
|
75
77
|
|
|
78
|
+
class ArgoWorkflowsSensorCleanupException(MetaflowException):
|
|
79
|
+
headline = "Argo Workflows sensor clean up error"
|
|
80
|
+
|
|
81
|
+
|
|
76
82
|
class ArgoWorkflowsSchedulingException(MetaflowException):
|
|
77
83
|
headline = "Argo Workflows scheduling error"
|
|
78
84
|
|
|
@@ -91,6 +97,7 @@ class ArgoWorkflows(object):
|
|
|
91
97
|
name,
|
|
92
98
|
graph: FlowGraph,
|
|
93
99
|
flow,
|
|
100
|
+
code_package_metadata,
|
|
94
101
|
code_package_sha,
|
|
95
102
|
code_package_url,
|
|
96
103
|
production_token,
|
|
@@ -115,6 +122,8 @@ class ArgoWorkflows(object):
|
|
|
115
122
|
incident_io_metadata: List[str] = None,
|
|
116
123
|
enable_heartbeat_daemon=True,
|
|
117
124
|
enable_error_msg_capture=False,
|
|
125
|
+
workflow_title=None,
|
|
126
|
+
workflow_description=None,
|
|
118
127
|
):
|
|
119
128
|
# Some high-level notes -
|
|
120
129
|
#
|
|
@@ -140,9 +149,19 @@ class ArgoWorkflows(object):
|
|
|
140
149
|
# ensure that your Argo Workflows controller doesn't restrict
|
|
141
150
|
# templateReferencing.
|
|
142
151
|
|
|
152
|
+
# get initial configs
|
|
153
|
+
self.initial_configs = init_config()
|
|
154
|
+
for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_URL"]:
|
|
155
|
+
if entry not in self.initial_configs:
|
|
156
|
+
raise ArgoWorkflowsException(
|
|
157
|
+
f"{entry} was not found in metaflow config. Please make sure to run `outerbounds configure <...>` command which can be found on the Outerbounds UI or reach out to your Outerbounds support team."
|
|
158
|
+
)
|
|
159
|
+
|
|
143
160
|
self.name = name
|
|
144
161
|
self.graph = graph
|
|
162
|
+
self._parse_conditional_branches()
|
|
145
163
|
self.flow = flow
|
|
164
|
+
self.code_package_metadata = code_package_metadata
|
|
146
165
|
self.code_package_sha = code_package_sha
|
|
147
166
|
self.code_package_url = code_package_url
|
|
148
167
|
self.production_token = production_token
|
|
@@ -169,6 +188,8 @@ class ArgoWorkflows(object):
|
|
|
169
188
|
)
|
|
170
189
|
self.enable_heartbeat_daemon = enable_heartbeat_daemon
|
|
171
190
|
self.enable_error_msg_capture = enable_error_msg_capture
|
|
191
|
+
self.workflow_title = workflow_title
|
|
192
|
+
self.workflow_description = workflow_description
|
|
172
193
|
self.parameters = self._process_parameters()
|
|
173
194
|
self.config_parameters = self._process_config_parameters()
|
|
174
195
|
self.triggers, self.trigger_options = self._process_triggers()
|
|
@@ -183,6 +204,7 @@ class ArgoWorkflows(object):
|
|
|
183
204
|
return str(self._workflow_template)
|
|
184
205
|
|
|
185
206
|
def deploy(self):
|
|
207
|
+
self.cleanup_previous_sensors()
|
|
186
208
|
try:
|
|
187
209
|
# Register workflow template.
|
|
188
210
|
ArgoClient(namespace=KUBERNETES_NAMESPACE).register_workflow_template(
|
|
@@ -191,6 +213,37 @@ class ArgoWorkflows(object):
|
|
|
191
213
|
except Exception as e:
|
|
192
214
|
raise ArgoWorkflowsException(str(e))
|
|
193
215
|
|
|
216
|
+
def cleanup_previous_sensors(self):
|
|
217
|
+
try:
|
|
218
|
+
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
|
|
219
|
+
# Check for existing deployment and do cleanup
|
|
220
|
+
old_template = client.get_workflow_template(self.name)
|
|
221
|
+
if not old_template:
|
|
222
|
+
return None
|
|
223
|
+
# Clean up old sensors
|
|
224
|
+
old_sensor_namespace = old_template["metadata"]["annotations"].get(
|
|
225
|
+
"metaflow/sensor_namespace"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if old_sensor_namespace is None:
|
|
229
|
+
# This workflow was created before sensor annotations
|
|
230
|
+
# and may have a sensor in the default namespace
|
|
231
|
+
# we will delete it and it'll get recreated if need be
|
|
232
|
+
old_sensor_name = ArgoWorkflows._sensor_name(self.name)
|
|
233
|
+
client.delete_sensor(old_sensor_name, client._namespace)
|
|
234
|
+
else:
|
|
235
|
+
# delete old sensor only if it was somewhere else, otherwise it'll get replaced
|
|
236
|
+
old_sensor_name = old_template["metadata"]["annotations"][
|
|
237
|
+
"metaflow/sensor_name"
|
|
238
|
+
]
|
|
239
|
+
if (
|
|
240
|
+
not self._sensor
|
|
241
|
+
or old_sensor_namespace != ARGO_EVENTS_SENSOR_NAMESPACE
|
|
242
|
+
):
|
|
243
|
+
client.delete_sensor(old_sensor_name, old_sensor_namespace)
|
|
244
|
+
except Exception as e:
|
|
245
|
+
raise ArgoWorkflowsSensorCleanupException(str(e))
|
|
246
|
+
|
|
194
247
|
@staticmethod
|
|
195
248
|
def _sanitize(name):
|
|
196
249
|
# Metaflow allows underscores in node names, which are disallowed in Argo
|
|
@@ -205,28 +258,33 @@ class ArgoWorkflows(object):
|
|
|
205
258
|
return name.replace(".", "-")
|
|
206
259
|
|
|
207
260
|
@staticmethod
|
|
208
|
-
def list_templates(flow_name, all=False):
|
|
261
|
+
def list_templates(flow_name, all=False, page_size=100):
|
|
209
262
|
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
|
|
210
263
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
template["metadata"]["name"]
|
|
217
|
-
for template in templates
|
|
218
|
-
if all
|
|
219
|
-
or flow_name
|
|
220
|
-
== template["metadata"]
|
|
221
|
-
.get("annotations", {})
|
|
222
|
-
.get("metaflow/flow_name", None)
|
|
223
|
-
]
|
|
224
|
-
return template_names
|
|
264
|
+
for template in client.get_workflow_templates(page_size=page_size):
|
|
265
|
+
if all or flow_name == template["metadata"].get("annotations", {}).get(
|
|
266
|
+
"metaflow/flow_name", None
|
|
267
|
+
):
|
|
268
|
+
yield template["metadata"]["name"]
|
|
225
269
|
|
|
226
270
|
@staticmethod
|
|
227
271
|
def delete(name):
|
|
228
272
|
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
|
|
229
273
|
|
|
274
|
+
# the workflow template might not exist, but we still want to try clean up associated sensors and schedules.
|
|
275
|
+
workflow_template = client.get_workflow_template(name) or {}
|
|
276
|
+
workflow_annotations = workflow_template.get("metadata", {}).get(
|
|
277
|
+
"annotations", {}
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
sensor_name = ArgoWorkflows._sensor_name(
|
|
281
|
+
workflow_annotations.get("metaflow/sensor_name", name)
|
|
282
|
+
)
|
|
283
|
+
# if below is missing then it was deployed before custom sensor namespaces
|
|
284
|
+
sensor_namespace = workflow_annotations.get(
|
|
285
|
+
"metaflow/sensor_namespace", KUBERNETES_NAMESPACE
|
|
286
|
+
)
|
|
287
|
+
|
|
230
288
|
# Always try to delete the schedule. Failure in deleting the schedule should not
|
|
231
289
|
# be treated as an error, due to any of the following reasons
|
|
232
290
|
# - there might not have been a schedule, or it was deleted by some other means
|
|
@@ -236,7 +294,7 @@ class ArgoWorkflows(object):
|
|
|
236
294
|
|
|
237
295
|
# The workflow might have sensors attached to it, which consume actual resources.
|
|
238
296
|
# Try to delete these as well.
|
|
239
|
-
sensor_deleted = client.delete_sensor(
|
|
297
|
+
sensor_deleted = client.delete_sensor(sensor_name, sensor_namespace)
|
|
240
298
|
|
|
241
299
|
# After cleaning up related resources, delete the workflow in question.
|
|
242
300
|
# Failure in deleting is treated as critical and will be made visible to the user
|
|
@@ -260,6 +318,7 @@ class ArgoWorkflows(object):
|
|
|
260
318
|
flow_name=flow_name, run_id=name
|
|
261
319
|
)
|
|
262
320
|
)
|
|
321
|
+
return True
|
|
263
322
|
|
|
264
323
|
@staticmethod
|
|
265
324
|
def get_workflow_status(flow_name, name):
|
|
@@ -384,6 +443,25 @@ class ArgoWorkflows(object):
|
|
|
384
443
|
"metaflow/project_flow_name": current.project_flow_name,
|
|
385
444
|
}
|
|
386
445
|
)
|
|
446
|
+
|
|
447
|
+
# Add Argo Workflows title and description annotations
|
|
448
|
+
# https://argo-workflows.readthedocs.io/en/latest/title-and-description/
|
|
449
|
+
# Use CLI-provided values or auto-populate from metadata
|
|
450
|
+
title = (
|
|
451
|
+
(self.workflow_title.strip() if self.workflow_title else None)
|
|
452
|
+
or current.get("project_flow_name")
|
|
453
|
+
or self.flow.name
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
description = (
|
|
457
|
+
self.workflow_description.strip() if self.workflow_description else None
|
|
458
|
+
) or (self.flow.__doc__.strip() if self.flow.__doc__ else None)
|
|
459
|
+
|
|
460
|
+
if title:
|
|
461
|
+
annotations["workflows.argoproj.io/title"] = title
|
|
462
|
+
if description:
|
|
463
|
+
annotations["workflows.argoproj.io/description"] = description
|
|
464
|
+
|
|
387
465
|
return annotations
|
|
388
466
|
|
|
389
467
|
def _get_schedule(self):
|
|
@@ -404,11 +482,10 @@ class ArgoWorkflows(object):
|
|
|
404
482
|
# Metaflow will overwrite any existing sensor.
|
|
405
483
|
sensor_name = ArgoWorkflows._sensor_name(self.name)
|
|
406
484
|
if self._sensor:
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
argo_client.delete_sensor(sensor_name)
|
|
485
|
+
# The new sensor will go into the sensor namespace specified
|
|
486
|
+
ArgoClient(namespace=ARGO_EVENTS_SENSOR_NAMESPACE).register_sensor(
|
|
487
|
+
sensor_name, self._sensor.to_json(), ARGO_EVENTS_SENSOR_NAMESPACE
|
|
488
|
+
)
|
|
412
489
|
except Exception as e:
|
|
413
490
|
raise ArgoWorkflowsSchedulingException(str(e))
|
|
414
491
|
|
|
@@ -541,7 +618,16 @@ class ArgoWorkflows(object):
|
|
|
541
618
|
# the JSON equivalent of None to please argo-workflows. Unfortunately it
|
|
542
619
|
# has the side effect of casting the parameter value to string null during
|
|
543
620
|
# execution - which needs to be fixed imminently.
|
|
544
|
-
if
|
|
621
|
+
if default_value is None:
|
|
622
|
+
default_value = json.dumps(None)
|
|
623
|
+
elif param_type == "JSON":
|
|
624
|
+
if not isinstance(default_value, str):
|
|
625
|
+
# once to serialize the default value if needed.
|
|
626
|
+
default_value = json.dumps(default_value)
|
|
627
|
+
# adds outer quotes to param
|
|
628
|
+
default_value = json.dumps(default_value)
|
|
629
|
+
else:
|
|
630
|
+
# Make argo sensors happy
|
|
545
631
|
default_value = json.dumps(default_value)
|
|
546
632
|
|
|
547
633
|
parameters[param.name] = dict(
|
|
@@ -551,7 +637,7 @@ class ArgoWorkflows(object):
|
|
|
551
637
|
type=param_type,
|
|
552
638
|
description=param.kwargs.get("help"),
|
|
553
639
|
is_required=is_required,
|
|
554
|
-
**extra_attrs
|
|
640
|
+
**extra_attrs,
|
|
555
641
|
)
|
|
556
642
|
return parameters
|
|
557
643
|
|
|
@@ -735,6 +821,7 @@ class ArgoWorkflows(object):
|
|
|
735
821
|
# references to them within the DAGTask.
|
|
736
822
|
|
|
737
823
|
annotations = {}
|
|
824
|
+
|
|
738
825
|
if self._schedule is not None:
|
|
739
826
|
# timezone is an optional field and json dumps on None will result in null
|
|
740
827
|
# hence configuring it to an empty string
|
|
@@ -757,7 +844,9 @@ class ArgoWorkflows(object):
|
|
|
757
844
|
{key: trigger.get(key) for key in ["name", "type"]}
|
|
758
845
|
for trigger in self.triggers
|
|
759
846
|
]
|
|
760
|
-
)
|
|
847
|
+
),
|
|
848
|
+
"metaflow/sensor_name": ArgoWorkflows._sensor_name(self.name),
|
|
849
|
+
"metaflow/sensor_namespace": ARGO_EVENTS_SENSOR_NAMESPACE,
|
|
761
850
|
}
|
|
762
851
|
)
|
|
763
852
|
if self.notify_on_error:
|
|
@@ -796,6 +885,7 @@ class ArgoWorkflows(object):
|
|
|
796
885
|
|
|
797
886
|
dag_annotation = {"metaflow/dag": json.dumps(graph_info)}
|
|
798
887
|
|
|
888
|
+
lifecycle_hooks = self._lifecycle_hooks()
|
|
799
889
|
return (
|
|
800
890
|
WorkflowTemplate()
|
|
801
891
|
.metadata(
|
|
@@ -845,7 +935,16 @@ class ArgoWorkflows(object):
|
|
|
845
935
|
.annotations(
|
|
846
936
|
{
|
|
847
937
|
**annotations,
|
|
848
|
-
**
|
|
938
|
+
**{
|
|
939
|
+
k: v
|
|
940
|
+
for k, v in self._base_annotations.items()
|
|
941
|
+
if k
|
|
942
|
+
# Skip custom title/description for workflows as this makes it harder to find specific runs.
|
|
943
|
+
not in [
|
|
944
|
+
"workflows.argoproj.io/title",
|
|
945
|
+
"workflows.argoproj.io/description",
|
|
946
|
+
]
|
|
947
|
+
},
|
|
849
948
|
**{"metaflow/run_id": "argo-{{workflow.name}}"},
|
|
850
949
|
}
|
|
851
950
|
)
|
|
@@ -860,11 +959,7 @@ class ArgoWorkflows(object):
|
|
|
860
959
|
Arguments().parameters(
|
|
861
960
|
[
|
|
862
961
|
Parameter(parameter["name"])
|
|
863
|
-
.value(
|
|
864
|
-
"'%s'" % parameter["value"]
|
|
865
|
-
if parameter["type"] == "JSON"
|
|
866
|
-
else parameter["value"]
|
|
867
|
-
)
|
|
962
|
+
.value(parameter["value"])
|
|
868
963
|
.description(parameter.get("description"))
|
|
869
964
|
# TODO: Better handle IncludeFile in Argo Workflows UI.
|
|
870
965
|
for parameter in self.parameters.values()
|
|
@@ -904,97 +999,20 @@ class ArgoWorkflows(object):
|
|
|
904
999
|
if self.enable_error_msg_capture
|
|
905
1000
|
else None
|
|
906
1001
|
)
|
|
907
|
-
# Set
|
|
1002
|
+
# Set lifecycle hooks if notifications are enabled
|
|
908
1003
|
.hooks(
|
|
909
1004
|
{
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
"notify-slack-on-success": LifecycleHook()
|
|
914
|
-
.expression("workflow.status == 'Succeeded'")
|
|
915
|
-
.template("notify-slack-on-success"),
|
|
916
|
-
}
|
|
917
|
-
if self.notify_on_success and self.notify_slack_webhook_url
|
|
918
|
-
else {}
|
|
919
|
-
),
|
|
920
|
-
**(
|
|
921
|
-
{
|
|
922
|
-
# workflow status maps to Completed
|
|
923
|
-
"notify-pager-duty-on-success": LifecycleHook()
|
|
924
|
-
.expression("workflow.status == 'Succeeded'")
|
|
925
|
-
.template("notify-pager-duty-on-success"),
|
|
926
|
-
}
|
|
927
|
-
if self.notify_on_success
|
|
928
|
-
and self.notify_pager_duty_integration_key
|
|
929
|
-
else {}
|
|
930
|
-
),
|
|
931
|
-
**(
|
|
932
|
-
{
|
|
933
|
-
# workflow status maps to Completed
|
|
934
|
-
"notify-incident-io-on-success": LifecycleHook()
|
|
935
|
-
.expression("workflow.status == 'Succeeded'")
|
|
936
|
-
.template("notify-incident-io-on-success"),
|
|
937
|
-
}
|
|
938
|
-
if self.notify_on_success
|
|
939
|
-
and self.notify_incident_io_api_key
|
|
940
|
-
else {}
|
|
941
|
-
),
|
|
942
|
-
**(
|
|
943
|
-
{
|
|
944
|
-
# workflow status maps to Failed or Error
|
|
945
|
-
"notify-slack-on-failure": LifecycleHook()
|
|
946
|
-
.expression("workflow.status == 'Failed'")
|
|
947
|
-
.template("notify-slack-on-error"),
|
|
948
|
-
"notify-slack-on-error": LifecycleHook()
|
|
949
|
-
.expression("workflow.status == 'Error'")
|
|
950
|
-
.template("notify-slack-on-error"),
|
|
951
|
-
}
|
|
952
|
-
if self.notify_on_error and self.notify_slack_webhook_url
|
|
953
|
-
else {}
|
|
954
|
-
),
|
|
955
|
-
**(
|
|
956
|
-
{
|
|
957
|
-
# workflow status maps to Failed or Error
|
|
958
|
-
"notify-pager-duty-on-failure": LifecycleHook()
|
|
959
|
-
.expression("workflow.status == 'Failed'")
|
|
960
|
-
.template("notify-pager-duty-on-error"),
|
|
961
|
-
"notify-pager-duty-on-error": LifecycleHook()
|
|
962
|
-
.expression("workflow.status == 'Error'")
|
|
963
|
-
.template("notify-pager-duty-on-error"),
|
|
964
|
-
}
|
|
965
|
-
if self.notify_on_error
|
|
966
|
-
and self.notify_pager_duty_integration_key
|
|
967
|
-
else {}
|
|
968
|
-
),
|
|
969
|
-
**(
|
|
970
|
-
{
|
|
971
|
-
# workflow status maps to Failed or Error
|
|
972
|
-
"notify-incident-io-on-failure": LifecycleHook()
|
|
973
|
-
.expression("workflow.status == 'Failed'")
|
|
974
|
-
.template("notify-incident-io-on-error"),
|
|
975
|
-
"notify-incident-io-on-error": LifecycleHook()
|
|
976
|
-
.expression("workflow.status == 'Error'")
|
|
977
|
-
.template("notify-incident-io-on-error"),
|
|
978
|
-
}
|
|
979
|
-
if self.notify_on_error and self.notify_incident_io_api_key
|
|
980
|
-
else {}
|
|
981
|
-
),
|
|
982
|
-
# Warning: terrible hack to workaround a bug in Argo Workflow
|
|
983
|
-
# where the hooks listed above do not execute unless
|
|
984
|
-
# there is an explicit exit hook. as and when this
|
|
985
|
-
# bug is patched, we should remove this effectively
|
|
986
|
-
# no-op hook.
|
|
987
|
-
**(
|
|
988
|
-
{"exit": LifecycleHook().template("exit-hook-hack")}
|
|
989
|
-
if self.notify_on_error or self.notify_on_success
|
|
990
|
-
else {}
|
|
991
|
-
),
|
|
1005
|
+
lifecycle.name: lifecycle
|
|
1006
|
+
for hook in lifecycle_hooks
|
|
1007
|
+
for lifecycle in hook.lifecycle_hooks
|
|
992
1008
|
}
|
|
993
1009
|
)
|
|
994
1010
|
# Top-level DAG template(s)
|
|
995
1011
|
.templates(self._dag_templates())
|
|
996
1012
|
# Container templates
|
|
997
1013
|
.templates(self._container_templates())
|
|
1014
|
+
# Lifecycle hook template(s)
|
|
1015
|
+
.templates([hook.template for hook in lifecycle_hooks])
|
|
998
1016
|
# Exit hook template(s)
|
|
999
1017
|
.templates(self._exit_hook_templates())
|
|
1000
1018
|
# Sidecar templates (Daemon Containers)
|
|
@@ -1002,6 +1020,199 @@ class ArgoWorkflows(object):
|
|
|
1002
1020
|
)
|
|
1003
1021
|
)
|
|
1004
1022
|
|
|
1023
|
+
# Visit every node and record information on conditional step structure
|
|
1024
|
+
def _parse_conditional_branches(self):
|
|
1025
|
+
self.conditional_nodes = set()
|
|
1026
|
+
self.conditional_join_nodes = set()
|
|
1027
|
+
self.matching_conditional_join_dict = {}
|
|
1028
|
+
self.recursive_nodes = set()
|
|
1029
|
+
|
|
1030
|
+
node_conditional_parents = {}
|
|
1031
|
+
node_conditional_branches = {}
|
|
1032
|
+
|
|
1033
|
+
def _visit(node, conditional_branch, conditional_parents=None):
|
|
1034
|
+
if not node.type == "split-switch" and not (
|
|
1035
|
+
conditional_branch and conditional_parents
|
|
1036
|
+
):
|
|
1037
|
+
# skip regular non-conditional nodes entirely
|
|
1038
|
+
return
|
|
1039
|
+
|
|
1040
|
+
if node.type == "split-switch":
|
|
1041
|
+
conditional_branch = conditional_branch + [node.name]
|
|
1042
|
+
c_br = node_conditional_branches.get(node.name, [])
|
|
1043
|
+
node_conditional_branches[node.name] = c_br + [
|
|
1044
|
+
b for b in conditional_branch if b not in c_br
|
|
1045
|
+
]
|
|
1046
|
+
|
|
1047
|
+
conditional_parents = (
|
|
1048
|
+
[node.name]
|
|
1049
|
+
if not conditional_parents
|
|
1050
|
+
else conditional_parents + [node.name]
|
|
1051
|
+
)
|
|
1052
|
+
node_conditional_parents[node.name] = conditional_parents
|
|
1053
|
+
|
|
1054
|
+
# check for recursion. this split is recursive if any of its out functions are itself.
|
|
1055
|
+
if any(
|
|
1056
|
+
out_func for out_func in node.out_funcs if out_func == node.name
|
|
1057
|
+
):
|
|
1058
|
+
self.recursive_nodes.add(node.name)
|
|
1059
|
+
|
|
1060
|
+
if conditional_parents and not node.type == "split-switch":
|
|
1061
|
+
node_conditional_parents[node.name] = conditional_parents
|
|
1062
|
+
conditional_branch = conditional_branch + [node.name]
|
|
1063
|
+
c_br = node_conditional_branches.get(node.name, [])
|
|
1064
|
+
node_conditional_branches[node.name] = c_br + [
|
|
1065
|
+
b for b in conditional_branch if b not in c_br
|
|
1066
|
+
]
|
|
1067
|
+
|
|
1068
|
+
self.conditional_nodes.add(node.name)
|
|
1069
|
+
|
|
1070
|
+
if conditional_branch and conditional_parents:
|
|
1071
|
+
for n in node.out_funcs:
|
|
1072
|
+
child = self.graph[n]
|
|
1073
|
+
if child.name == node.name:
|
|
1074
|
+
continue
|
|
1075
|
+
_visit(child, conditional_branch, conditional_parents)
|
|
1076
|
+
|
|
1077
|
+
# First we visit all nodes to determine conditional parents and branches
|
|
1078
|
+
for n in self.graph:
|
|
1079
|
+
_visit(n, [])
|
|
1080
|
+
|
|
1081
|
+
# helper to clean up conditional info for all children of a node, until a new split-switch is encountered.
|
|
1082
|
+
def _cleanup_conditional_status(node_name, seen):
|
|
1083
|
+
if self.graph[node_name].type == "split-switch":
|
|
1084
|
+
# stop recursive cleanup if we hit a new split-switch
|
|
1085
|
+
return
|
|
1086
|
+
if node_name in self.conditional_nodes:
|
|
1087
|
+
self.conditional_nodes.remove(node_name)
|
|
1088
|
+
node_conditional_parents[node_name] = []
|
|
1089
|
+
node_conditional_branches[node_name] = []
|
|
1090
|
+
for p in self.graph[node_name].out_funcs:
|
|
1091
|
+
if p not in seen:
|
|
1092
|
+
_cleanup_conditional_status(p, seen + [p])
|
|
1093
|
+
|
|
1094
|
+
# Then we traverse again in order to determine conditional join nodes, and matching conditional join info
|
|
1095
|
+
for node in self.graph:
|
|
1096
|
+
if node_conditional_parents.get(node.name, False):
|
|
1097
|
+
# do the required postprocessing for anything requiring node.in_funcs
|
|
1098
|
+
|
|
1099
|
+
# check that in previous parsing we have not closed all conditional in_funcs.
|
|
1100
|
+
# If so, this step can not be conditional either
|
|
1101
|
+
is_conditional = any(
|
|
1102
|
+
in_func in self.conditional_nodes
|
|
1103
|
+
or self.graph[in_func].type == "split-switch"
|
|
1104
|
+
for in_func in node.in_funcs
|
|
1105
|
+
)
|
|
1106
|
+
if is_conditional:
|
|
1107
|
+
self.conditional_nodes.add(node.name)
|
|
1108
|
+
else:
|
|
1109
|
+
if node.name in self.conditional_nodes:
|
|
1110
|
+
self.conditional_nodes.remove(node.name)
|
|
1111
|
+
|
|
1112
|
+
# does this node close the latest conditional parent branches?
|
|
1113
|
+
conditional_in_funcs = [
|
|
1114
|
+
in_func
|
|
1115
|
+
for in_func in node.in_funcs
|
|
1116
|
+
if node_conditional_branches.get(in_func, False)
|
|
1117
|
+
]
|
|
1118
|
+
closed_conditional_parents = []
|
|
1119
|
+
for last_split_switch in node_conditional_parents.get(node.name, [])[
|
|
1120
|
+
::-1
|
|
1121
|
+
]:
|
|
1122
|
+
last_conditional_split_nodes = self.graph[
|
|
1123
|
+
last_split_switch
|
|
1124
|
+
].out_funcs
|
|
1125
|
+
# NOTE: How do we define a conditional join step?
|
|
1126
|
+
# The idea here is that we check if the conditional branches(e.g. chains of conditional steps leading to) of all the in_funcs
|
|
1127
|
+
# manage to tick off every step name that follows a split-switch
|
|
1128
|
+
# For example, consider the following structure
|
|
1129
|
+
# switch_step -> A, B, C
|
|
1130
|
+
# A -> A2 -> A3 -> A4 -> B2
|
|
1131
|
+
# B -> B2 -> B3 -> C3
|
|
1132
|
+
# C -> C2 -> C3 -> end
|
|
1133
|
+
#
|
|
1134
|
+
# if we look at the in_funcs for C3, they are (C2, B3)
|
|
1135
|
+
# B3 closes off branches started by A and B
|
|
1136
|
+
# C3 closes off branches started by C
|
|
1137
|
+
# therefore C3 is a conditional join step for the 'switch_step'
|
|
1138
|
+
# NOTE: Then what about a skip step?
|
|
1139
|
+
# some switch cases might not introduce any distinct steps of their own, opting to instead skip ahead to a later common step.
|
|
1140
|
+
# Example:
|
|
1141
|
+
# switch_step -> A, B, C
|
|
1142
|
+
# A -> A1 -> B2 -> C
|
|
1143
|
+
# B -> B1 -> B2 -> C
|
|
1144
|
+
#
|
|
1145
|
+
# In this case, C is a skip step as it does not add any conditional branching of its own.
|
|
1146
|
+
# C is also a conditional join, as it closes all branches started by 'switch_step'
|
|
1147
|
+
|
|
1148
|
+
closes_branches = all(
|
|
1149
|
+
(
|
|
1150
|
+
# branch_root_node_name needs to be in at least one conditional_branch for it to be closed.
|
|
1151
|
+
any(
|
|
1152
|
+
branch_root_node_name
|
|
1153
|
+
in node_conditional_branches.get(in_func, [])
|
|
1154
|
+
for in_func in conditional_in_funcs
|
|
1155
|
+
)
|
|
1156
|
+
# need to account for a switch case skipping completely, not having a conditional-branch of its own.
|
|
1157
|
+
if branch_root_node_name != node.name
|
|
1158
|
+
else True
|
|
1159
|
+
)
|
|
1160
|
+
for branch_root_node_name in last_conditional_split_nodes
|
|
1161
|
+
)
|
|
1162
|
+
if closes_branches:
|
|
1163
|
+
closed_conditional_parents.append(last_split_switch)
|
|
1164
|
+
|
|
1165
|
+
self.conditional_join_nodes.add(node.name)
|
|
1166
|
+
self.matching_conditional_join_dict[last_split_switch] = (
|
|
1167
|
+
node.name
|
|
1168
|
+
)
|
|
1169
|
+
|
|
1170
|
+
# Did we close all conditionals? Then this branch and all its children are not conditional anymore (unless a new conditional branch is encountered).
|
|
1171
|
+
if not [
|
|
1172
|
+
p
|
|
1173
|
+
for p in node_conditional_parents.get(node.name, [])
|
|
1174
|
+
if p not in closed_conditional_parents
|
|
1175
|
+
]:
|
|
1176
|
+
_cleanup_conditional_status(node.name, [])
|
|
1177
|
+
|
|
1178
|
+
def _is_conditional_node(self, node):
|
|
1179
|
+
return node.name in self.conditional_nodes
|
|
1180
|
+
|
|
1181
|
+
def _is_conditional_skip_node(self, node):
|
|
1182
|
+
return (
|
|
1183
|
+
self._is_conditional_node(node)
|
|
1184
|
+
and any(
|
|
1185
|
+
self.graph[in_func].type == "split-switch" for in_func in node.in_funcs
|
|
1186
|
+
)
|
|
1187
|
+
and len(
|
|
1188
|
+
[
|
|
1189
|
+
in_func
|
|
1190
|
+
for in_func in node.in_funcs
|
|
1191
|
+
if self._is_conditional_node(self.graph[in_func])
|
|
1192
|
+
or self.graph[in_func].type == "split-switch"
|
|
1193
|
+
]
|
|
1194
|
+
)
|
|
1195
|
+
> 1
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
def _is_conditional_join_node(self, node):
|
|
1199
|
+
return node.name in self.conditional_join_nodes
|
|
1200
|
+
|
|
1201
|
+
def _many_in_funcs_all_conditional(self, node):
|
|
1202
|
+
cond_in_funcs = [
|
|
1203
|
+
in_func
|
|
1204
|
+
for in_func in node.in_funcs
|
|
1205
|
+
if self._is_conditional_node(self.graph[in_func])
|
|
1206
|
+
]
|
|
1207
|
+
return len(cond_in_funcs) > 1 and len(cond_in_funcs) == len(node.in_funcs)
|
|
1208
|
+
|
|
1209
|
+
def _is_recursive_node(self, node):
|
|
1210
|
+
return node.name in self.recursive_nodes
|
|
1211
|
+
|
|
1212
|
+
def _matching_conditional_join(self, node):
|
|
1213
|
+
# If no earlier conditional join step is found during parsing, then 'end' is always one.
|
|
1214
|
+
return self.matching_conditional_join_dict.get(node.name, "end")
|
|
1215
|
+
|
|
1005
1216
|
# Visit every node and yield the uber DAGTemplate(s).
|
|
1006
1217
|
def _dag_templates(self):
|
|
1007
1218
|
def _visit(
|
|
@@ -1010,6 +1221,7 @@ class ArgoWorkflows(object):
|
|
|
1010
1221
|
templates=None,
|
|
1011
1222
|
dag_tasks=None,
|
|
1012
1223
|
parent_foreach=None,
|
|
1224
|
+
seen=None,
|
|
1013
1225
|
): # Returns Tuple[List[Template], List[DAGTask]]
|
|
1014
1226
|
""" """
|
|
1015
1227
|
# Every for-each node results in a separate subDAG and an equivalent
|
|
@@ -1019,18 +1231,28 @@ class ArgoWorkflows(object):
|
|
|
1019
1231
|
# of the for-each node.
|
|
1020
1232
|
|
|
1021
1233
|
# Emit if we have reached the end of the sub workflow
|
|
1234
|
+
if seen is None:
|
|
1235
|
+
seen = []
|
|
1022
1236
|
if dag_tasks is None:
|
|
1023
1237
|
dag_tasks = []
|
|
1024
1238
|
if templates is None:
|
|
1025
1239
|
templates = []
|
|
1240
|
+
|
|
1026
1241
|
if exit_node is not None and exit_node is node.name:
|
|
1027
1242
|
return templates, dag_tasks
|
|
1243
|
+
if node.name in seen:
|
|
1244
|
+
return templates, dag_tasks
|
|
1245
|
+
|
|
1246
|
+
seen.append(node.name)
|
|
1247
|
+
|
|
1248
|
+
# helper variable for recursive conditional inputs
|
|
1249
|
+
has_foreach_inputs = False
|
|
1028
1250
|
if node.name == "start":
|
|
1029
1251
|
# Start node has no dependencies.
|
|
1030
1252
|
dag_task = DAGTask(self._sanitize(node.name)).template(
|
|
1031
1253
|
self._sanitize(node.name)
|
|
1032
1254
|
)
|
|
1033
|
-
|
|
1255
|
+
if (
|
|
1034
1256
|
node.is_inside_foreach
|
|
1035
1257
|
and self.graph[node.in_funcs[0]].type == "foreach"
|
|
1036
1258
|
and not self.graph[node.in_funcs[0]].parallel_foreach
|
|
@@ -1038,9 +1260,10 @@ class ArgoWorkflows(object):
|
|
|
1038
1260
|
# vs what is a "num_parallel" based foreach (i.e. something that follows gang semantics.)
|
|
1039
1261
|
# A `regular` foreach is basically any arbitrary kind of foreach.
|
|
1040
1262
|
):
|
|
1263
|
+
# helper variable for recursive conditional inputs
|
|
1264
|
+
has_foreach_inputs = True
|
|
1041
1265
|
# Child of a foreach node needs input-paths as well as split-index
|
|
1042
1266
|
# This child is the first node of the sub workflow and has no dependency
|
|
1043
|
-
|
|
1044
1267
|
parameters = [
|
|
1045
1268
|
Parameter("input-paths").value("{{inputs.parameters.input-paths}}"),
|
|
1046
1269
|
Parameter("split-index").value("{{inputs.parameters.split-index}}"),
|
|
@@ -1164,23 +1387,89 @@ class ArgoWorkflows(object):
|
|
|
1164
1387
|
]
|
|
1165
1388
|
)
|
|
1166
1389
|
|
|
1390
|
+
conditional_deps = [
|
|
1391
|
+
"%s.Succeeded" % self._sanitize(in_func)
|
|
1392
|
+
for in_func in node.in_funcs
|
|
1393
|
+
if self._is_conditional_node(self.graph[in_func])
|
|
1394
|
+
or self.graph[in_func].type == "split-switch"
|
|
1395
|
+
]
|
|
1396
|
+
required_deps = [
|
|
1397
|
+
"%s.Succeeded" % self._sanitize(in_func)
|
|
1398
|
+
for in_func in node.in_funcs
|
|
1399
|
+
if not self._is_conditional_node(self.graph[in_func])
|
|
1400
|
+
and self.graph[in_func].type != "split-switch"
|
|
1401
|
+
]
|
|
1402
|
+
if self._is_conditional_skip_node(
|
|
1403
|
+
node
|
|
1404
|
+
) or self._many_in_funcs_all_conditional(node):
|
|
1405
|
+
# skip nodes need unique condition handling
|
|
1406
|
+
conditional_deps = [
|
|
1407
|
+
"%s.Succeeded" % self._sanitize(in_func)
|
|
1408
|
+
for in_func in node.in_funcs
|
|
1409
|
+
]
|
|
1410
|
+
required_deps = []
|
|
1411
|
+
|
|
1412
|
+
both_conditions = required_deps and conditional_deps
|
|
1413
|
+
|
|
1414
|
+
depends_str = "{required}{_and}{conditional}".format(
|
|
1415
|
+
required=("(%s)" if both_conditions else "%s")
|
|
1416
|
+
% " && ".join(required_deps),
|
|
1417
|
+
_and=" && " if both_conditions else "",
|
|
1418
|
+
conditional=("(%s)" if both_conditions else "%s")
|
|
1419
|
+
% " || ".join(conditional_deps),
|
|
1420
|
+
)
|
|
1167
1421
|
dag_task = (
|
|
1168
1422
|
DAGTask(self._sanitize(node.name))
|
|
1169
|
-
.
|
|
1170
|
-
[self._sanitize(in_func) for in_func in node.in_funcs]
|
|
1171
|
-
)
|
|
1423
|
+
.depends(depends_str)
|
|
1172
1424
|
.template(self._sanitize(node.name))
|
|
1173
1425
|
.arguments(Arguments().parameters(parameters))
|
|
1174
1426
|
)
|
|
1175
1427
|
|
|
1428
|
+
# Add conditional if this is the first step in a conditional branch
|
|
1429
|
+
switch_in_funcs = [
|
|
1430
|
+
in_func
|
|
1431
|
+
for in_func in node.in_funcs
|
|
1432
|
+
if self.graph[in_func].type == "split-switch"
|
|
1433
|
+
]
|
|
1434
|
+
if (
|
|
1435
|
+
self._is_conditional_node(node)
|
|
1436
|
+
or self._is_conditional_skip_node(node)
|
|
1437
|
+
or self._is_conditional_join_node(node)
|
|
1438
|
+
) and switch_in_funcs:
|
|
1439
|
+
conditional_when = "||".join(
|
|
1440
|
+
[
|
|
1441
|
+
"{{tasks.%s.outputs.parameters.switch-step}}==%s"
|
|
1442
|
+
% (self._sanitize(switch_in_func), node.name)
|
|
1443
|
+
for switch_in_func in switch_in_funcs
|
|
1444
|
+
]
|
|
1445
|
+
)
|
|
1446
|
+
|
|
1447
|
+
non_switch_in_funcs = [
|
|
1448
|
+
in_func
|
|
1449
|
+
for in_func in node.in_funcs
|
|
1450
|
+
if in_func not in switch_in_funcs
|
|
1451
|
+
]
|
|
1452
|
+
status_when = ""
|
|
1453
|
+
if non_switch_in_funcs:
|
|
1454
|
+
status_when = "||".join(
|
|
1455
|
+
[
|
|
1456
|
+
"{{tasks.%s.status}}==Succeeded"
|
|
1457
|
+
% self._sanitize(in_func)
|
|
1458
|
+
for in_func in non_switch_in_funcs
|
|
1459
|
+
]
|
|
1460
|
+
)
|
|
1461
|
+
|
|
1462
|
+
total_when = (
|
|
1463
|
+
f"({status_when}) || ({conditional_when})"
|
|
1464
|
+
if status_when
|
|
1465
|
+
else conditional_when
|
|
1466
|
+
)
|
|
1467
|
+
dag_task.when(total_when)
|
|
1468
|
+
|
|
1176
1469
|
dag_tasks.append(dag_task)
|
|
1177
1470
|
# End the workflow if we have reached the end of the flow
|
|
1178
1471
|
if node.type == "end":
|
|
1179
|
-
return
|
|
1180
|
-
Template(self.flow.name).dag(
|
|
1181
|
-
DAGTemplate().fail_fast().tasks(dag_tasks)
|
|
1182
|
-
)
|
|
1183
|
-
] + templates, dag_tasks
|
|
1472
|
+
return templates, dag_tasks
|
|
1184
1473
|
# For split nodes traverse all the children
|
|
1185
1474
|
if node.type == "split":
|
|
1186
1475
|
for n in node.out_funcs:
|
|
@@ -1190,6 +1479,7 @@ class ArgoWorkflows(object):
|
|
|
1190
1479
|
templates,
|
|
1191
1480
|
dag_tasks,
|
|
1192
1481
|
parent_foreach,
|
|
1482
|
+
seen,
|
|
1193
1483
|
)
|
|
1194
1484
|
return _visit(
|
|
1195
1485
|
self.graph[node.matching_join],
|
|
@@ -1197,6 +1487,118 @@ class ArgoWorkflows(object):
|
|
|
1197
1487
|
templates,
|
|
1198
1488
|
dag_tasks,
|
|
1199
1489
|
parent_foreach,
|
|
1490
|
+
seen,
|
|
1491
|
+
)
|
|
1492
|
+
elif node.type == "split-switch":
|
|
1493
|
+
if self._is_recursive_node(node):
|
|
1494
|
+
# we need an additional recursive template if the step is recursive
|
|
1495
|
+
# NOTE: in the recursive case, the original step is renamed in the container templates to 'recursive-<step_name>'
|
|
1496
|
+
# so that we do not have to touch the step references in the DAG.
|
|
1497
|
+
#
|
|
1498
|
+
# NOTE: The way that recursion in Argo Workflows is achieved is with the following structure:
|
|
1499
|
+
# - the usual 'example-step' template which would match example_step in flow code is renamed to 'recursive-example-step'
|
|
1500
|
+
# - templates has another template with the original task name: 'example-step'
|
|
1501
|
+
# - the template 'example-step' in turn has steps
|
|
1502
|
+
# - 'example-step-internal' which uses the metaflow step executing template 'recursive-example-step'
|
|
1503
|
+
# - 'example-step-recursion' which calls the parent template 'example-step' if switch-step output from 'example-step-internal' matches the condition.
|
|
1504
|
+
sanitized_name = self._sanitize(node.name)
|
|
1505
|
+
templates.append(
|
|
1506
|
+
Template(sanitized_name)
|
|
1507
|
+
.steps(
|
|
1508
|
+
[
|
|
1509
|
+
WorkflowStep()
|
|
1510
|
+
.name("%s-internal" % sanitized_name)
|
|
1511
|
+
.template("recursive-%s" % sanitized_name)
|
|
1512
|
+
.arguments(
|
|
1513
|
+
Arguments().parameters(
|
|
1514
|
+
[
|
|
1515
|
+
Parameter("input-paths").value(
|
|
1516
|
+
"{{inputs.parameters.input-paths}}"
|
|
1517
|
+
)
|
|
1518
|
+
]
|
|
1519
|
+
# Add the additional inputs required by specific node types.
|
|
1520
|
+
# We do not need to cover joins or @parallel, as a split-switch step can not be either one of these.
|
|
1521
|
+
+ (
|
|
1522
|
+
[
|
|
1523
|
+
Parameter("split-index").value(
|
|
1524
|
+
"{{inputs.parameters.split-index}}"
|
|
1525
|
+
)
|
|
1526
|
+
]
|
|
1527
|
+
if has_foreach_inputs
|
|
1528
|
+
else []
|
|
1529
|
+
)
|
|
1530
|
+
)
|
|
1531
|
+
)
|
|
1532
|
+
]
|
|
1533
|
+
)
|
|
1534
|
+
.steps(
|
|
1535
|
+
[
|
|
1536
|
+
WorkflowStep()
|
|
1537
|
+
.name("%s-recursion" % sanitized_name)
|
|
1538
|
+
.template(sanitized_name)
|
|
1539
|
+
.when(
|
|
1540
|
+
"{{steps.%s-internal.outputs.parameters.switch-step}}==%s"
|
|
1541
|
+
% (sanitized_name, node.name)
|
|
1542
|
+
)
|
|
1543
|
+
.arguments(
|
|
1544
|
+
Arguments().parameters(
|
|
1545
|
+
[
|
|
1546
|
+
Parameter("input-paths").value(
|
|
1547
|
+
"argo-{{workflow.name}}/%s/{{steps.%s-internal.outputs.parameters.task-id}}"
|
|
1548
|
+
% (node.name, sanitized_name)
|
|
1549
|
+
)
|
|
1550
|
+
]
|
|
1551
|
+
+ (
|
|
1552
|
+
[
|
|
1553
|
+
Parameter("split-index").value(
|
|
1554
|
+
"{{inputs.parameters.split-index}}"
|
|
1555
|
+
)
|
|
1556
|
+
]
|
|
1557
|
+
if has_foreach_inputs
|
|
1558
|
+
else []
|
|
1559
|
+
)
|
|
1560
|
+
)
|
|
1561
|
+
),
|
|
1562
|
+
]
|
|
1563
|
+
)
|
|
1564
|
+
.inputs(Inputs().parameters(parameters))
|
|
1565
|
+
.outputs(
|
|
1566
|
+
# NOTE: We try to read the output parameters from the recursive template call first (<step>-recursion), and the internal step second (<step>-internal).
|
|
1567
|
+
# This guarantees that we always get the output parameters of the last recursive step that executed.
|
|
1568
|
+
Outputs().parameters(
|
|
1569
|
+
[
|
|
1570
|
+
Parameter("task-id").valueFrom(
|
|
1571
|
+
{
|
|
1572
|
+
"expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['task-id']"
|
|
1573
|
+
% (sanitized_name, sanitized_name)
|
|
1574
|
+
}
|
|
1575
|
+
),
|
|
1576
|
+
Parameter("switch-step").valueFrom(
|
|
1577
|
+
{
|
|
1578
|
+
"expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['switch-step']"
|
|
1579
|
+
% (sanitized_name, sanitized_name)
|
|
1580
|
+
}
|
|
1581
|
+
),
|
|
1582
|
+
]
|
|
1583
|
+
)
|
|
1584
|
+
)
|
|
1585
|
+
)
|
|
1586
|
+
for n in node.out_funcs:
|
|
1587
|
+
_visit(
|
|
1588
|
+
self.graph[n],
|
|
1589
|
+
self._matching_conditional_join(node),
|
|
1590
|
+
templates,
|
|
1591
|
+
dag_tasks,
|
|
1592
|
+
parent_foreach,
|
|
1593
|
+
seen,
|
|
1594
|
+
)
|
|
1595
|
+
return _visit(
|
|
1596
|
+
self.graph[self._matching_conditional_join(node)],
|
|
1597
|
+
exit_node,
|
|
1598
|
+
templates,
|
|
1599
|
+
dag_tasks,
|
|
1600
|
+
parent_foreach,
|
|
1601
|
+
seen,
|
|
1200
1602
|
)
|
|
1201
1603
|
# For foreach nodes generate a new sub DAGTemplate
|
|
1202
1604
|
# We do this for "regular" foreaches (ie. `self.next(self.a, foreach=)`)
|
|
@@ -1225,7 +1627,7 @@ class ArgoWorkflows(object):
|
|
|
1225
1627
|
#
|
|
1226
1628
|
foreach_task = (
|
|
1227
1629
|
DAGTask(foreach_template_name)
|
|
1228
|
-
.
|
|
1630
|
+
.depends(f"{self._sanitize(node.name)}.Succeeded")
|
|
1229
1631
|
.template(foreach_template_name)
|
|
1230
1632
|
.arguments(
|
|
1231
1633
|
Arguments().parameters(
|
|
@@ -1270,6 +1672,16 @@ class ArgoWorkflows(object):
|
|
|
1270
1672
|
% self._sanitize(node.name)
|
|
1271
1673
|
)
|
|
1272
1674
|
)
|
|
1675
|
+
# Add conditional if this is the first step in a conditional branch
|
|
1676
|
+
if self._is_conditional_node(node) and not any(
|
|
1677
|
+
self._is_conditional_node(self.graph[in_func])
|
|
1678
|
+
for in_func in node.in_funcs
|
|
1679
|
+
):
|
|
1680
|
+
in_func = node.in_funcs[0]
|
|
1681
|
+
foreach_task.when(
|
|
1682
|
+
"{{tasks.%s.outputs.parameters.switch-step}}==%s"
|
|
1683
|
+
% (self._sanitize(in_func), node.name)
|
|
1684
|
+
)
|
|
1273
1685
|
dag_tasks.append(foreach_task)
|
|
1274
1686
|
templates, dag_tasks_1 = _visit(
|
|
1275
1687
|
self.graph[node.out_funcs[0]],
|
|
@@ -1277,6 +1689,7 @@ class ArgoWorkflows(object):
|
|
|
1277
1689
|
templates,
|
|
1278
1690
|
[],
|
|
1279
1691
|
node.name,
|
|
1692
|
+
seen,
|
|
1280
1693
|
)
|
|
1281
1694
|
|
|
1282
1695
|
# How do foreach's work on Argo:
|
|
@@ -1313,7 +1726,22 @@ class ArgoWorkflows(object):
|
|
|
1313
1726
|
self.graph[node.matching_join].in_funcs[0]
|
|
1314
1727
|
)
|
|
1315
1728
|
}
|
|
1316
|
-
|
|
1729
|
+
if not self._is_conditional_join_node(
|
|
1730
|
+
self.graph[node.matching_join]
|
|
1731
|
+
)
|
|
1732
|
+
else
|
|
1733
|
+
# Note: If the nodes leading to the join are conditional, then we need to use an expression to pick the outputs from the task that executed.
|
|
1734
|
+
# ref for operators: https://github.com/expr-lang/expr/blob/master/docs/language-definition.md
|
|
1735
|
+
{
|
|
1736
|
+
"expression": "get((%s)?.parameters, 'task-id')"
|
|
1737
|
+
% " ?? ".join(
|
|
1738
|
+
f"tasks['{self._sanitize(func)}']?.outputs"
|
|
1739
|
+
for func in self.graph[
|
|
1740
|
+
node.matching_join
|
|
1741
|
+
].in_funcs
|
|
1742
|
+
)
|
|
1743
|
+
}
|
|
1744
|
+
),
|
|
1317
1745
|
]
|
|
1318
1746
|
if not node.parallel_foreach
|
|
1319
1747
|
else [
|
|
@@ -1346,7 +1774,7 @@ class ArgoWorkflows(object):
|
|
|
1346
1774
|
join_foreach_task = (
|
|
1347
1775
|
DAGTask(self._sanitize(self.graph[node.matching_join].name))
|
|
1348
1776
|
.template(self._sanitize(self.graph[node.matching_join].name))
|
|
1349
|
-
.
|
|
1777
|
+
.depends(f"{foreach_template_name}.Succeeded")
|
|
1350
1778
|
.arguments(
|
|
1351
1779
|
Arguments().parameters(
|
|
1352
1780
|
(
|
|
@@ -1395,6 +1823,7 @@ class ArgoWorkflows(object):
|
|
|
1395
1823
|
templates,
|
|
1396
1824
|
dag_tasks,
|
|
1397
1825
|
parent_foreach,
|
|
1826
|
+
seen,
|
|
1398
1827
|
)
|
|
1399
1828
|
# For linear nodes continue traversing to the next node
|
|
1400
1829
|
if node.type in ("linear", "join", "start"):
|
|
@@ -1404,6 +1833,7 @@ class ArgoWorkflows(object):
|
|
|
1404
1833
|
templates,
|
|
1405
1834
|
dag_tasks,
|
|
1406
1835
|
parent_foreach,
|
|
1836
|
+
seen,
|
|
1407
1837
|
)
|
|
1408
1838
|
else:
|
|
1409
1839
|
raise ArgoWorkflowsException(
|
|
@@ -1417,7 +1847,11 @@ class ArgoWorkflows(object):
|
|
|
1417
1847
|
for daemon_template in self._daemon_templates()
|
|
1418
1848
|
]
|
|
1419
1849
|
|
|
1420
|
-
templates,
|
|
1850
|
+
templates, dag_tasks = _visit(node=self.graph["start"], dag_tasks=daemon_tasks)
|
|
1851
|
+
# Add the DAG template only after fully traversing the graph so we are guaranteed to have all the dag_tasks collected.
|
|
1852
|
+
templates.append(
|
|
1853
|
+
Template(self.flow.name).dag(DAGTemplate().fail_fast().tasks(dag_tasks))
|
|
1854
|
+
)
|
|
1421
1855
|
return templates
|
|
1422
1856
|
|
|
1423
1857
|
# Visit every node and yield ContainerTemplates.
|
|
@@ -1473,6 +1907,18 @@ class ArgoWorkflows(object):
|
|
|
1473
1907
|
input_paths_expr = (
|
|
1474
1908
|
"export INPUT_PATHS={{inputs.parameters.input-paths}}"
|
|
1475
1909
|
)
|
|
1910
|
+
if (
|
|
1911
|
+
self._is_conditional_join_node(node)
|
|
1912
|
+
or self._many_in_funcs_all_conditional(node)
|
|
1913
|
+
or self._is_conditional_skip_node(node)
|
|
1914
|
+
):
|
|
1915
|
+
# NOTE: Argo template expressions that fail to resolve, output the expression itself as a value.
|
|
1916
|
+
# With conditional steps, some of the input-paths are therefore 'broken' due to containing a nil expression
|
|
1917
|
+
# e.g. "{{ tasks['A'].outputs.parameters.task-id }}" when task A never executed.
|
|
1918
|
+
# We base64 encode the input-paths in order to not pollute the execution environment with templating expressions.
|
|
1919
|
+
# NOTE: Adding conditionals that check if a key exists or not does not work either, due to an issue with how Argo
|
|
1920
|
+
# handles tasks in a nested foreach (withParam template) leading to all such expressions getting evaluated as false.
|
|
1921
|
+
input_paths_expr = "export INPUT_PATHS={{=toBase64(inputs.parameters['input-paths'])}}"
|
|
1476
1922
|
input_paths = "$(echo $INPUT_PATHS)"
|
|
1477
1923
|
if any(self.graph[n].type == "foreach" for n in node.in_funcs):
|
|
1478
1924
|
task_idx = "{{inputs.parameters.split-index}}"
|
|
@@ -1488,7 +1934,6 @@ class ArgoWorkflows(object):
|
|
|
1488
1934
|
# foreaches
|
|
1489
1935
|
task_idx = "{{inputs.parameters.split-index}}"
|
|
1490
1936
|
root_input = "{{inputs.parameters.root-input-path}}"
|
|
1491
|
-
|
|
1492
1937
|
# Task string to be hashed into an ID
|
|
1493
1938
|
task_str = "-".join(
|
|
1494
1939
|
[
|
|
@@ -1571,7 +2016,9 @@ class ArgoWorkflows(object):
|
|
|
1571
2016
|
mflog_expr,
|
|
1572
2017
|
]
|
|
1573
2018
|
+ self.environment.get_package_commands(
|
|
1574
|
-
self.code_package_url,
|
|
2019
|
+
self.code_package_url,
|
|
2020
|
+
self.flow_datastore.TYPE,
|
|
2021
|
+
self.code_package_metadata,
|
|
1575
2022
|
)
|
|
1576
2023
|
)
|
|
1577
2024
|
step_cmds = self.environment.bootstrap_commands(
|
|
@@ -1583,6 +2030,7 @@ class ArgoWorkflows(object):
|
|
|
1583
2030
|
decorator.make_decorator_spec()
|
|
1584
2031
|
for decorator in node.decorators
|
|
1585
2032
|
if not decorator.statically_defined
|
|
2033
|
+
and decorator.inserted_by is None
|
|
1586
2034
|
]
|
|
1587
2035
|
}
|
|
1588
2036
|
# FlowDecorators can define their own top-level options. They are
|
|
@@ -1620,7 +2068,7 @@ class ArgoWorkflows(object):
|
|
|
1620
2068
|
# {{foo.bar['param_name']}}.
|
|
1621
2069
|
# https://argoproj.github.io/argo-events/tutorials/02-parameterization/
|
|
1622
2070
|
# http://masterminds.github.io/sprig/strings.html
|
|
1623
|
-
"--%s
|
|
2071
|
+
"--%s=\\\"$(python -m metaflow.plugins.argo.param_val {{=toBase64(workflow.parameters['%s'])}})\\\""
|
|
1624
2072
|
% (parameter["name"], parameter["name"])
|
|
1625
2073
|
for parameter in self.parameters.values()
|
|
1626
2074
|
]
|
|
@@ -1642,10 +2090,40 @@ class ArgoWorkflows(object):
|
|
|
1642
2090
|
]
|
|
1643
2091
|
)
|
|
1644
2092
|
input_paths = "%s/_parameters/%s" % (run_id, task_id_params)
|
|
2093
|
+
# Only for static joins and conditional_joins
|
|
2094
|
+
elif (
|
|
2095
|
+
self._is_conditional_join_node(node)
|
|
2096
|
+
or self._many_in_funcs_all_conditional(node)
|
|
2097
|
+
or self._is_conditional_skip_node(node)
|
|
2098
|
+
) and not (
|
|
2099
|
+
node.type == "join"
|
|
2100
|
+
and self.graph[node.split_parents[-1]].type == "foreach"
|
|
2101
|
+
):
|
|
2102
|
+
# we need to pass in the set of conditional in_funcs to the pathspec generating script as in the case of split-switch skipping cases,
|
|
2103
|
+
# non-conditional input-paths need to be ignored in favour of conditional ones when they have executed.
|
|
2104
|
+
skippable_input_steps = ",".join(
|
|
2105
|
+
[
|
|
2106
|
+
in_func
|
|
2107
|
+
for in_func in node.in_funcs
|
|
2108
|
+
if self.graph[in_func].type == "split-switch"
|
|
2109
|
+
]
|
|
2110
|
+
)
|
|
2111
|
+
input_paths = (
|
|
2112
|
+
"$(python -m metaflow.plugins.argo.conditional_input_paths %s %s)"
|
|
2113
|
+
% (input_paths, skippable_input_steps)
|
|
2114
|
+
)
|
|
1645
2115
|
elif (
|
|
1646
2116
|
node.type == "join"
|
|
1647
2117
|
and self.graph[node.split_parents[-1]].type == "foreach"
|
|
1648
2118
|
):
|
|
2119
|
+
# foreach-joins straight out of conditional branches are not yet supported
|
|
2120
|
+
if self._is_conditional_join_node(node) and len(node.in_funcs) > 1:
|
|
2121
|
+
raise ArgoWorkflowsException(
|
|
2122
|
+
"Conditional steps inside a foreach that transition directly into a join step are not currently supported.\n"
|
|
2123
|
+
"As a workaround, add a common step after the conditional steps %s "
|
|
2124
|
+
"that will transition to a join."
|
|
2125
|
+
% ", ".join("*%s*" % f for f in node.in_funcs)
|
|
2126
|
+
)
|
|
1649
2127
|
# Set aggregated input-paths for a for-each join
|
|
1650
2128
|
foreach_step = next(
|
|
1651
2129
|
n for n in node.in_funcs if self.graph[n].is_inside_foreach
|
|
@@ -1667,6 +2145,8 @@ class ArgoWorkflows(object):
|
|
|
1667
2145
|
foreach_step,
|
|
1668
2146
|
)
|
|
1669
2147
|
)
|
|
2148
|
+
# NOTE: input-paths might be extremely lengthy so we dump these to disk instead of passing them directly to the cmd
|
|
2149
|
+
step_cmds.append("echo %s >> /tmp/mf-input-paths" % input_paths)
|
|
1670
2150
|
step = [
|
|
1671
2151
|
"step",
|
|
1672
2152
|
node.name,
|
|
@@ -1674,7 +2154,7 @@ class ArgoWorkflows(object):
|
|
|
1674
2154
|
"--task-id %s" % task_id,
|
|
1675
2155
|
"--retry-count %s" % retry_count,
|
|
1676
2156
|
"--max-user-code-retries %d" % user_code_retries,
|
|
1677
|
-
"--input-paths
|
|
2157
|
+
"--input-paths-filename /tmp/mf-input-paths",
|
|
1678
2158
|
]
|
|
1679
2159
|
if node.parallel_step:
|
|
1680
2160
|
step.append(
|
|
@@ -1749,6 +2229,7 @@ class ArgoWorkflows(object):
|
|
|
1749
2229
|
**{
|
|
1750
2230
|
# These values are needed by Metaflow to set it's internal
|
|
1751
2231
|
# state appropriately.
|
|
2232
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
|
1752
2233
|
"METAFLOW_CODE_URL": self.code_package_url,
|
|
1753
2234
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
|
1754
2235
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
|
@@ -1887,7 +2368,7 @@ class ArgoWorkflows(object):
|
|
|
1887
2368
|
[Parameter("num-parallel"), Parameter("task-id-entropy")]
|
|
1888
2369
|
)
|
|
1889
2370
|
else:
|
|
1890
|
-
# append
|
|
2371
|
+
# append these only for joins of foreaches, not static splits
|
|
1891
2372
|
inputs.append(Parameter("split-cardinality"))
|
|
1892
2373
|
# check if the node is a @parallel node.
|
|
1893
2374
|
elif node.parallel_step:
|
|
@@ -1922,6 +2403,13 @@ class ArgoWorkflows(object):
|
|
|
1922
2403
|
# are derived at runtime.
|
|
1923
2404
|
if not (node.name == "end" or node.parallel_step):
|
|
1924
2405
|
outputs = [Parameter("task-id").valueFrom({"path": "/mnt/out/task_id"})]
|
|
2406
|
+
|
|
2407
|
+
# If this step is a split-switch one, we need to output the switch step name
|
|
2408
|
+
if node.type == "split-switch":
|
|
2409
|
+
outputs.append(
|
|
2410
|
+
Parameter("switch-step").valueFrom({"path": "/mnt/out/switch_step"})
|
|
2411
|
+
)
|
|
2412
|
+
|
|
1925
2413
|
if node.type == "foreach":
|
|
1926
2414
|
# Emit split cardinality from foreach task
|
|
1927
2415
|
outputs.append(
|
|
@@ -1955,17 +2443,10 @@ class ArgoWorkflows(object):
|
|
|
1955
2443
|
and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
|
|
1956
2444
|
}
|
|
1957
2445
|
|
|
1958
|
-
#
|
|
1959
|
-
initial_configs = init_config()
|
|
1960
|
-
for entry in ["OBP_PERIMETER", "OBP_INTEGRATIONS_URL"]:
|
|
1961
|
-
if entry not in initial_configs:
|
|
1962
|
-
raise ArgoWorkflowsException(
|
|
1963
|
-
f"{entry} was not found in metaflow config. Please make sure to run `outerbounds configure <...>` command which can be found on the Ourebounds UI or reach out to your Outerbounds support team."
|
|
1964
|
-
)
|
|
1965
|
-
|
|
2446
|
+
# OBP configs
|
|
1966
2447
|
additional_obp_configs = {
|
|
1967
|
-
"OBP_PERIMETER": initial_configs["OBP_PERIMETER"],
|
|
1968
|
-
"OBP_INTEGRATIONS_URL": initial_configs["OBP_INTEGRATIONS_URL"],
|
|
2448
|
+
"OBP_PERIMETER": self.initial_configs["OBP_PERIMETER"],
|
|
2449
|
+
"OBP_INTEGRATIONS_URL": self.initial_configs["OBP_INTEGRATIONS_URL"],
|
|
1969
2450
|
}
|
|
1970
2451
|
|
|
1971
2452
|
# Tmpfs variables
|
|
@@ -2021,6 +2502,7 @@ class ArgoWorkflows(object):
|
|
|
2021
2502
|
namespace=resources["namespace"],
|
|
2022
2503
|
image=resources["image"],
|
|
2023
2504
|
image_pull_policy=resources["image_pull_policy"],
|
|
2505
|
+
image_pull_secrets=resources["image_pull_secrets"],
|
|
2024
2506
|
service_account=resources["service_account"],
|
|
2025
2507
|
secrets=(
|
|
2026
2508
|
[
|
|
@@ -2166,8 +2648,13 @@ class ArgoWorkflows(object):
|
|
|
2166
2648
|
)
|
|
2167
2649
|
)
|
|
2168
2650
|
else:
|
|
2651
|
+
template_name = self._sanitize(node.name)
|
|
2652
|
+
if self._is_recursive_node(node):
|
|
2653
|
+
# The recursive template has the original step name,
|
|
2654
|
+
# this becomes a template within the recursive ones 'steps'
|
|
2655
|
+
template_name = self._sanitize("recursive-%s" % node.name)
|
|
2169
2656
|
yield (
|
|
2170
|
-
Template(
|
|
2657
|
+
Template(template_name)
|
|
2171
2658
|
# Set @timeout values
|
|
2172
2659
|
.active_deadline_seconds(run_time_limit)
|
|
2173
2660
|
# Set service account
|
|
@@ -2209,6 +2696,17 @@ class ArgoWorkflows(object):
|
|
|
2209
2696
|
.node_selectors(resources.get("node_selector"))
|
|
2210
2697
|
# Set tolerations
|
|
2211
2698
|
.tolerations(resources.get("tolerations"))
|
|
2699
|
+
# Set image pull secrets if present. We need to use pod_spec_patch due to Argo not supporting this on a template level.
|
|
2700
|
+
.pod_spec_patch(
|
|
2701
|
+
{
|
|
2702
|
+
"imagePullSecrets": [
|
|
2703
|
+
{"name": secret}
|
|
2704
|
+
for secret in resources["image_pull_secrets"]
|
|
2705
|
+
]
|
|
2706
|
+
}
|
|
2707
|
+
if resources["image_pull_secrets"]
|
|
2708
|
+
else None
|
|
2709
|
+
)
|
|
2212
2710
|
# Set container
|
|
2213
2711
|
.container(
|
|
2214
2712
|
# TODO: Unify the logic with kubernetes.py
|
|
@@ -2356,40 +2854,190 @@ class ArgoWorkflows(object):
|
|
|
2356
2854
|
templates.append(self._heartbeat_daemon_template())
|
|
2357
2855
|
return templates
|
|
2358
2856
|
|
|
2359
|
-
# Return
|
|
2360
|
-
def
|
|
2361
|
-
|
|
2857
|
+
# Return lifecycle hooks for workflow execution notifications.
|
|
2858
|
+
def _lifecycle_hooks(self):
|
|
2859
|
+
hooks = []
|
|
2362
2860
|
if self.notify_on_error:
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2861
|
+
hooks.append(self._slack_error_template())
|
|
2862
|
+
hooks.append(self._pager_duty_alert_template())
|
|
2863
|
+
hooks.append(self._incident_io_alert_template())
|
|
2366
2864
|
if self.notify_on_success:
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2865
|
+
hooks.append(self._slack_success_template())
|
|
2866
|
+
hooks.append(self._pager_duty_change_template())
|
|
2867
|
+
hooks.append(self._incident_io_change_template())
|
|
2868
|
+
|
|
2869
|
+
exit_hook_decos = self.flow._flow_decorators.get("exit_hook", [])
|
|
2870
|
+
|
|
2871
|
+
for deco in exit_hook_decos:
|
|
2872
|
+
hooks.extend(self._lifecycle_hook_from_deco(deco))
|
|
2370
2873
|
|
|
2371
2874
|
# Clean up None values from templates.
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
if
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
# remove this effectively no-op template.
|
|
2379
|
-
# Note: We use the Http template because changing this to an actual no-op container had the side-effect of
|
|
2380
|
-
# leaving LifecycleHooks in a pending state even when they have finished execution.
|
|
2381
|
-
templates.append(
|
|
2382
|
-
Template("exit-hook-hack").http(
|
|
2383
|
-
Http("GET")
|
|
2384
|
-
.url(
|
|
2875
|
+
hooks = list(filter(None, hooks))
|
|
2876
|
+
|
|
2877
|
+
if hooks:
|
|
2878
|
+
hooks.append(
|
|
2879
|
+
ExitHookHack(
|
|
2880
|
+
url=(
|
|
2385
2881
|
self.notify_slack_webhook_url
|
|
2386
2882
|
or "https://events.pagerduty.com/v2/enqueue"
|
|
2387
2883
|
)
|
|
2388
|
-
.success_condition("true == true")
|
|
2389
2884
|
)
|
|
2390
2885
|
)
|
|
2886
|
+
return hooks
|
|
2887
|
+
|
|
2888
|
+
def _lifecycle_hook_from_deco(self, deco):
|
|
2889
|
+
from kubernetes import client as kubernetes_sdk
|
|
2890
|
+
|
|
2891
|
+
start_step = [step for step in self.graph if step.name == "start"][0]
|
|
2892
|
+
# We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
|
|
2893
|
+
# and it might contain the required libraries, allowing us to start up faster.
|
|
2894
|
+
start_kube_deco = [
|
|
2895
|
+
deco for deco in start_step.decorators if deco.name == "kubernetes"
|
|
2896
|
+
][0]
|
|
2897
|
+
resources = dict(start_kube_deco.attributes)
|
|
2898
|
+
kube_defaults = dict(start_kube_deco.defaults)
|
|
2899
|
+
|
|
2900
|
+
# OBP Configs
|
|
2901
|
+
additional_obp_configs = {
|
|
2902
|
+
"OBP_PERIMETER": self.initial_configs["OBP_PERIMETER"],
|
|
2903
|
+
"OBP_INTEGRATIONS_URL": self.initial_configs["OBP_INTEGRATIONS_URL"],
|
|
2904
|
+
}
|
|
2905
|
+
|
|
2906
|
+
run_id_template = "argo-{{workflow.name}}"
|
|
2907
|
+
metaflow_version = self.environment.get_environment_info()
|
|
2908
|
+
metaflow_version["flow_name"] = self.graph.name
|
|
2909
|
+
metaflow_version["production_token"] = self.production_token
|
|
2910
|
+
env = {
|
|
2911
|
+
# These values are needed by Metaflow to set it's internal
|
|
2912
|
+
# state appropriately.
|
|
2913
|
+
"METAFLOW_CODE_URL": self.code_package_url,
|
|
2914
|
+
"METAFLOW_CODE_SHA": self.code_package_sha,
|
|
2915
|
+
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
|
2916
|
+
"METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
|
|
2917
|
+
"METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
|
|
2918
|
+
"METAFLOW_USER": "argo-workflows",
|
|
2919
|
+
"METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
|
|
2920
|
+
"METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
|
|
2921
|
+
"METAFLOW_OWNER": self.username,
|
|
2922
|
+
}
|
|
2923
|
+
# pass on the Run pathspec for script
|
|
2924
|
+
env["RUN_PATHSPEC"] = f"{self.graph.name}/{run_id_template}"
|
|
2925
|
+
|
|
2926
|
+
# support Metaflow sandboxes
|
|
2927
|
+
env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
|
|
2928
|
+
|
|
2929
|
+
# support fetching secrets
|
|
2930
|
+
env.update(additional_obp_configs)
|
|
2931
|
+
|
|
2932
|
+
env["METAFLOW_WORKFLOW_NAME"] = "{{workflow.name}}"
|
|
2933
|
+
env["METAFLOW_WORKFLOW_NAMESPACE"] = "{{workflow.namespace}}"
|
|
2934
|
+
env = {
|
|
2935
|
+
k: v
|
|
2936
|
+
for k, v in env.items()
|
|
2937
|
+
if v is not None
|
|
2938
|
+
and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
|
|
2939
|
+
}
|
|
2940
|
+
|
|
2941
|
+
def _cmd(fn_name):
|
|
2942
|
+
mflog_expr = export_mflog_env_vars(
|
|
2943
|
+
datastore_type=self.flow_datastore.TYPE,
|
|
2944
|
+
stdout_path="$PWD/.logs/mflog_stdout",
|
|
2945
|
+
stderr_path="$PWD/.logs/mflog_stderr",
|
|
2946
|
+
flow_name=self.flow.name,
|
|
2947
|
+
run_id=run_id_template,
|
|
2948
|
+
step_name=f"_hook_{fn_name}",
|
|
2949
|
+
task_id="1",
|
|
2950
|
+
retry_count="0",
|
|
2951
|
+
)
|
|
2952
|
+
cmds = " && ".join(
|
|
2953
|
+
[
|
|
2954
|
+
# For supporting sandboxes, ensure that a custom script is executed
|
|
2955
|
+
# before anything else is executed. The script is passed in as an
|
|
2956
|
+
# env var.
|
|
2957
|
+
'${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
|
|
2958
|
+
"mkdir -p $PWD/.logs",
|
|
2959
|
+
mflog_expr,
|
|
2960
|
+
]
|
|
2961
|
+
+ self.environment.get_package_commands(
|
|
2962
|
+
self.code_package_url, self.flow_datastore.TYPE
|
|
2963
|
+
)[:-1]
|
|
2964
|
+
# Replace the line 'Task in starting'
|
|
2965
|
+
+ [f"mflog 'Lifecycle hook {fn_name} is starting.'"]
|
|
2966
|
+
+ [
|
|
2967
|
+
f"python -m metaflow.plugins.exit_hook.exit_hook_script {metaflow_version['script']} {fn_name} $RUN_PATHSPEC"
|
|
2968
|
+
]
|
|
2969
|
+
)
|
|
2970
|
+
|
|
2971
|
+
cmds = shlex.split('bash -c "%s"' % cmds)
|
|
2972
|
+
return cmds
|
|
2973
|
+
|
|
2974
|
+
def _container(cmds):
|
|
2975
|
+
return to_camelcase(
|
|
2976
|
+
kubernetes_sdk.V1Container(
|
|
2977
|
+
name="main",
|
|
2978
|
+
command=cmds,
|
|
2979
|
+
image=deco.attributes["options"].get("image", None)
|
|
2980
|
+
or resources["image"],
|
|
2981
|
+
env=[
|
|
2982
|
+
kubernetes_sdk.V1EnvVar(name=k, value=str(v))
|
|
2983
|
+
for k, v in env.items()
|
|
2984
|
+
],
|
|
2985
|
+
env_from=[
|
|
2986
|
+
kubernetes_sdk.V1EnvFromSource(
|
|
2987
|
+
secret_ref=kubernetes_sdk.V1SecretEnvSource(
|
|
2988
|
+
name=str(k),
|
|
2989
|
+
# optional=True
|
|
2990
|
+
)
|
|
2991
|
+
)
|
|
2992
|
+
for k in list(
|
|
2993
|
+
[]
|
|
2994
|
+
if not resources.get("secrets")
|
|
2995
|
+
else (
|
|
2996
|
+
[resources.get("secrets")]
|
|
2997
|
+
if isinstance(resources.get("secrets"), str)
|
|
2998
|
+
else resources.get("secrets")
|
|
2999
|
+
)
|
|
3000
|
+
)
|
|
3001
|
+
+ KUBERNETES_SECRETS.split(",")
|
|
3002
|
+
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
|
|
3003
|
+
if k
|
|
3004
|
+
],
|
|
3005
|
+
resources=kubernetes_sdk.V1ResourceRequirements(
|
|
3006
|
+
requests={
|
|
3007
|
+
"cpu": str(kube_defaults["cpu"]),
|
|
3008
|
+
"memory": "%sM" % str(kube_defaults["memory"]),
|
|
3009
|
+
}
|
|
3010
|
+
),
|
|
3011
|
+
).to_dict()
|
|
3012
|
+
)
|
|
3013
|
+
|
|
3014
|
+
# create lifecycle hooks from deco
|
|
3015
|
+
hooks = []
|
|
3016
|
+
for success_fn_name in deco.success_hooks:
|
|
3017
|
+
hook = ContainerHook(
|
|
3018
|
+
name=f"success-{success_fn_name.replace('_', '-')}",
|
|
3019
|
+
container=_container(cmds=_cmd(success_fn_name)),
|
|
3020
|
+
service_account_name=resources["service_account"],
|
|
3021
|
+
on_success=True,
|
|
3022
|
+
)
|
|
3023
|
+
hooks.append(hook)
|
|
3024
|
+
|
|
3025
|
+
for error_fn_name in deco.error_hooks:
|
|
3026
|
+
hook = ContainerHook(
|
|
3027
|
+
name=f"error-{error_fn_name.replace('_', '-')}",
|
|
3028
|
+
service_account_name=resources["service_account"],
|
|
3029
|
+
container=_container(cmds=_cmd(error_fn_name)),
|
|
3030
|
+
on_error=True,
|
|
3031
|
+
)
|
|
3032
|
+
hooks.append(hook)
|
|
3033
|
+
|
|
3034
|
+
return hooks
|
|
3035
|
+
|
|
3036
|
+
def _exit_hook_templates(self):
|
|
3037
|
+
templates = []
|
|
2391
3038
|
if self.enable_error_msg_capture:
|
|
2392
3039
|
templates.extend(self._error_msg_capture_hook_templates())
|
|
3040
|
+
|
|
2393
3041
|
return templates
|
|
2394
3042
|
|
|
2395
3043
|
def _error_msg_capture_hook_templates(self):
|
|
@@ -2430,7 +3078,9 @@ class ArgoWorkflows(object):
|
|
|
2430
3078
|
mflog_expr,
|
|
2431
3079
|
]
|
|
2432
3080
|
+ self.environment.get_package_commands(
|
|
2433
|
-
self.code_package_url,
|
|
3081
|
+
self.code_package_url,
|
|
3082
|
+
self.flow_datastore.TYPE,
|
|
3083
|
+
self.code_package_metadata,
|
|
2434
3084
|
)[:-1]
|
|
2435
3085
|
# Replace the line 'Task in starting'
|
|
2436
3086
|
# FIXME: this can be brittle.
|
|
@@ -2450,6 +3100,7 @@ class ArgoWorkflows(object):
|
|
|
2450
3100
|
env = {
|
|
2451
3101
|
# These values are needed by Metaflow to set it's internal
|
|
2452
3102
|
# state appropriately.
|
|
3103
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
|
2453
3104
|
"METAFLOW_CODE_URL": self.code_package_url,
|
|
2454
3105
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
|
2455
3106
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
|
@@ -2538,30 +3189,30 @@ class ArgoWorkflows(object):
|
|
|
2538
3189
|
# https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgx-send-an-alert-event
|
|
2539
3190
|
if self.notify_pager_duty_integration_key is None:
|
|
2540
3191
|
return None
|
|
2541
|
-
return
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
.
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
},
|
|
3192
|
+
return HttpExitHook(
|
|
3193
|
+
name="notify-pager-duty-on-error",
|
|
3194
|
+
method="POST",
|
|
3195
|
+
url="https://events.pagerduty.com/v2/enqueue",
|
|
3196
|
+
headers={"Content-Type": "application/json"},
|
|
3197
|
+
body=json.dumps(
|
|
3198
|
+
{
|
|
3199
|
+
"event_action": "trigger",
|
|
3200
|
+
"routing_key": self.notify_pager_duty_integration_key,
|
|
3201
|
+
# "dedup_key": self.flow.name, # TODO: Do we need deduplication?
|
|
3202
|
+
"payload": {
|
|
3203
|
+
"source": "{{workflow.name}}",
|
|
3204
|
+
"severity": "info",
|
|
3205
|
+
"summary": "Metaflow run %s/argo-{{workflow.name}} failed!"
|
|
3206
|
+
% self.flow.name,
|
|
3207
|
+
"custom_details": {
|
|
3208
|
+
"Flow": self.flow.name,
|
|
3209
|
+
"Run ID": "argo-{{workflow.name}}",
|
|
2560
3210
|
},
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
)
|
|
3211
|
+
},
|
|
3212
|
+
"links": self._pager_duty_notification_links(),
|
|
3213
|
+
}
|
|
3214
|
+
),
|
|
3215
|
+
on_error=True,
|
|
2565
3216
|
)
|
|
2566
3217
|
|
|
2567
3218
|
def _incident_io_alert_template(self):
|
|
@@ -2572,50 +3223,52 @@ class ArgoWorkflows(object):
|
|
|
2572
3223
|
"Creating alerts for errors requires a alert source config ID."
|
|
2573
3224
|
)
|
|
2574
3225
|
ui_links = self._incident_io_ui_urls_for_run()
|
|
2575
|
-
return
|
|
2576
|
-
|
|
2577
|
-
|
|
3226
|
+
return HttpExitHook(
|
|
3227
|
+
name="notify-incident-io-on-error",
|
|
3228
|
+
method="POST",
|
|
3229
|
+
url=(
|
|
2578
3230
|
"https://api.incident.io/v2/alert_events/http/%s"
|
|
2579
3231
|
% self.incident_io_alert_source_config_id
|
|
2580
|
-
)
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
),
|
|
2598
|
-
"source_url": (
|
|
2599
|
-
"%s/%s/%s"
|
|
2600
|
-
% (
|
|
2601
|
-
UI_URL.rstrip("/"),
|
|
2602
|
-
self.flow.name,
|
|
2603
|
-
"argo-{{workflow.name}}",
|
|
2604
|
-
)
|
|
2605
|
-
if UI_URL
|
|
2606
|
-
else None
|
|
3232
|
+
),
|
|
3233
|
+
headers={
|
|
3234
|
+
"Content-Type": "application/json",
|
|
3235
|
+
"Authorization": "Bearer %s" % self.notify_incident_io_api_key,
|
|
3236
|
+
},
|
|
3237
|
+
body=json.dumps(
|
|
3238
|
+
{
|
|
3239
|
+
"idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
|
|
3240
|
+
"status": "firing",
|
|
3241
|
+
"title": "Flow %s has failed." % self.flow.name,
|
|
3242
|
+
"description": "Metaflow run {run_pathspec} failed!{urls}".format(
|
|
3243
|
+
run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
|
|
3244
|
+
urls=(
|
|
3245
|
+
"\n\nSee details for the run at:\n\n"
|
|
3246
|
+
+ "\n\n".join(ui_links)
|
|
3247
|
+
if ui_links
|
|
3248
|
+
else ""
|
|
2607
3249
|
),
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
},
|
|
3250
|
+
),
|
|
3251
|
+
"source_url": (
|
|
3252
|
+
"%s/%s/%s"
|
|
3253
|
+
% (
|
|
3254
|
+
UI_URL.rstrip("/"),
|
|
3255
|
+
self.flow.name,
|
|
3256
|
+
"argo-{{workflow.name}}",
|
|
3257
|
+
)
|
|
3258
|
+
if UI_URL
|
|
3259
|
+
else None
|
|
3260
|
+
),
|
|
3261
|
+
"metadata": {
|
|
3262
|
+
**(self.incident_io_metadata or {}),
|
|
3263
|
+
**{
|
|
3264
|
+
"run_status": "failed",
|
|
3265
|
+
"flow_name": self.flow.name,
|
|
3266
|
+
"run_id": "argo-{{workflow.name}}",
|
|
2615
3267
|
},
|
|
2616
|
-
}
|
|
2617
|
-
|
|
2618
|
-
)
|
|
3268
|
+
},
|
|
3269
|
+
}
|
|
3270
|
+
),
|
|
3271
|
+
on_error=True,
|
|
2619
3272
|
)
|
|
2620
3273
|
|
|
2621
3274
|
def _incident_io_change_template(self):
|
|
@@ -2626,50 +3279,52 @@ class ArgoWorkflows(object):
|
|
|
2626
3279
|
"Creating alerts for successes requires an alert source config ID."
|
|
2627
3280
|
)
|
|
2628
3281
|
ui_links = self._incident_io_ui_urls_for_run()
|
|
2629
|
-
return
|
|
2630
|
-
|
|
2631
|
-
|
|
3282
|
+
return HttpExitHook(
|
|
3283
|
+
name="notify-incident-io-on-success",
|
|
3284
|
+
method="POST",
|
|
3285
|
+
url=(
|
|
2632
3286
|
"https://api.incident.io/v2/alert_events/http/%s"
|
|
2633
3287
|
% self.incident_io_alert_source_config_id
|
|
2634
|
-
)
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
),
|
|
2652
|
-
"source_url": (
|
|
2653
|
-
"%s/%s/%s"
|
|
2654
|
-
% (
|
|
2655
|
-
UI_URL.rstrip("/"),
|
|
2656
|
-
self.flow.name,
|
|
2657
|
-
"argo-{{workflow.name}}",
|
|
2658
|
-
)
|
|
2659
|
-
if UI_URL
|
|
2660
|
-
else None
|
|
3288
|
+
),
|
|
3289
|
+
headers={
|
|
3290
|
+
"Content-Type": "application/json",
|
|
3291
|
+
"Authorization": "Bearer %s" % self.notify_incident_io_api_key,
|
|
3292
|
+
},
|
|
3293
|
+
body=json.dumps(
|
|
3294
|
+
{
|
|
3295
|
+
"idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
|
|
3296
|
+
"status": "firing",
|
|
3297
|
+
"title": "Flow %s has succeeded." % self.flow.name,
|
|
3298
|
+
"description": "Metaflow run {run_pathspec} succeeded!{urls}".format(
|
|
3299
|
+
run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
|
|
3300
|
+
urls=(
|
|
3301
|
+
"\n\nSee details for the run at:\n\n"
|
|
3302
|
+
+ "\n\n".join(ui_links)
|
|
3303
|
+
if ui_links
|
|
3304
|
+
else ""
|
|
2661
3305
|
),
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
},
|
|
3306
|
+
),
|
|
3307
|
+
"source_url": (
|
|
3308
|
+
"%s/%s/%s"
|
|
3309
|
+
% (
|
|
3310
|
+
UI_URL.rstrip("/"),
|
|
3311
|
+
self.flow.name,
|
|
3312
|
+
"argo-{{workflow.name}}",
|
|
3313
|
+
)
|
|
3314
|
+
if UI_URL
|
|
3315
|
+
else None
|
|
3316
|
+
),
|
|
3317
|
+
"metadata": {
|
|
3318
|
+
**(self.incident_io_metadata or {}),
|
|
3319
|
+
**{
|
|
3320
|
+
"run_status": "succeeded",
|
|
3321
|
+
"flow_name": self.flow.name,
|
|
3322
|
+
"run_id": "argo-{{workflow.name}}",
|
|
2669
3323
|
},
|
|
2670
|
-
}
|
|
2671
|
-
|
|
2672
|
-
)
|
|
3324
|
+
},
|
|
3325
|
+
}
|
|
3326
|
+
),
|
|
3327
|
+
on_success=True,
|
|
2673
3328
|
)
|
|
2674
3329
|
|
|
2675
3330
|
def _incident_io_ui_urls_for_run(self):
|
|
@@ -2694,27 +3349,27 @@ class ArgoWorkflows(object):
|
|
|
2694
3349
|
# https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgy-send-a-change-event
|
|
2695
3350
|
if self.notify_pager_duty_integration_key is None:
|
|
2696
3351
|
return None
|
|
2697
|
-
return
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
.
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
},
|
|
3352
|
+
return HttpExitHook(
|
|
3353
|
+
name="notify-pager-duty-on-success",
|
|
3354
|
+
method="POST",
|
|
3355
|
+
url="https://events.pagerduty.com/v2/change/enqueue",
|
|
3356
|
+
headers={"Content-Type": "application/json"},
|
|
3357
|
+
body=json.dumps(
|
|
3358
|
+
{
|
|
3359
|
+
"routing_key": self.notify_pager_duty_integration_key,
|
|
3360
|
+
"payload": {
|
|
3361
|
+
"summary": "Metaflow run %s/argo-{{workflow.name}} Succeeded"
|
|
3362
|
+
% self.flow.name,
|
|
3363
|
+
"source": "{{workflow.name}}",
|
|
3364
|
+
"custom_details": {
|
|
3365
|
+
"Flow": self.flow.name,
|
|
3366
|
+
"Run ID": "argo-{{workflow.name}}",
|
|
2713
3367
|
},
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
|
|
2717
|
-
)
|
|
3368
|
+
},
|
|
3369
|
+
"links": self._pager_duty_notification_links(),
|
|
3370
|
+
}
|
|
3371
|
+
),
|
|
3372
|
+
on_success=True,
|
|
2718
3373
|
)
|
|
2719
3374
|
|
|
2720
3375
|
def _pager_duty_notification_links(self):
|
|
@@ -2836,8 +3491,12 @@ class ArgoWorkflows(object):
|
|
|
2836
3491
|
blocks = self._get_slack_blocks(message)
|
|
2837
3492
|
payload = {"text": message, "blocks": blocks}
|
|
2838
3493
|
|
|
2839
|
-
return
|
|
2840
|
-
|
|
3494
|
+
return HttpExitHook(
|
|
3495
|
+
name="notify-slack-on-error",
|
|
3496
|
+
method="POST",
|
|
3497
|
+
url=self.notify_slack_webhook_url,
|
|
3498
|
+
body=json.dumps(payload),
|
|
3499
|
+
on_error=True,
|
|
2841
3500
|
)
|
|
2842
3501
|
|
|
2843
3502
|
def _slack_success_template(self):
|
|
@@ -2852,8 +3511,12 @@ class ArgoWorkflows(object):
|
|
|
2852
3511
|
blocks = self._get_slack_blocks(message)
|
|
2853
3512
|
payload = {"text": message, "blocks": blocks}
|
|
2854
3513
|
|
|
2855
|
-
return
|
|
2856
|
-
|
|
3514
|
+
return HttpExitHook(
|
|
3515
|
+
name="notify-slack-on-success",
|
|
3516
|
+
method="POST",
|
|
3517
|
+
url=self.notify_slack_webhook_url,
|
|
3518
|
+
body=json.dumps(payload),
|
|
3519
|
+
on_success=True,
|
|
2857
3520
|
)
|
|
2858
3521
|
|
|
2859
3522
|
def _heartbeat_daemon_template(self):
|
|
@@ -2912,7 +3575,8 @@ class ArgoWorkflows(object):
|
|
|
2912
3575
|
mflog_expr,
|
|
2913
3576
|
]
|
|
2914
3577
|
+ self.environment.get_package_commands(
|
|
2915
|
-
self.code_package_url,
|
|
3578
|
+
self.code_package_url,
|
|
3579
|
+
self.flow_datastore.TYPE,
|
|
2916
3580
|
)[:-1]
|
|
2917
3581
|
# Replace the line 'Task in starting'
|
|
2918
3582
|
# FIXME: this can be brittle.
|
|
@@ -2927,6 +3591,7 @@ class ArgoWorkflows(object):
|
|
|
2927
3591
|
env = {
|
|
2928
3592
|
# These values are needed by Metaflow to set it's internal
|
|
2929
3593
|
# state appropriately.
|
|
3594
|
+
"METAFLOW_CODE_METADATA": self.code_package_metadata,
|
|
2930
3595
|
"METAFLOW_CODE_URL": self.code_package_url,
|
|
2931
3596
|
"METAFLOW_CODE_SHA": self.code_package_sha,
|
|
2932
3597
|
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
|
|
@@ -3125,7 +3790,7 @@ class ArgoWorkflows(object):
|
|
|
3125
3790
|
# Sensor metadata.
|
|
3126
3791
|
ObjectMeta()
|
|
3127
3792
|
.name(ArgoWorkflows._sensor_name(self.name))
|
|
3128
|
-
.namespace(
|
|
3793
|
+
.namespace(ARGO_EVENTS_SENSOR_NAMESPACE)
|
|
3129
3794
|
.labels(self._base_labels)
|
|
3130
3795
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
|
3131
3796
|
.annotations(self._base_annotations)
|
|
@@ -3175,8 +3840,8 @@ class ArgoWorkflows(object):
|
|
|
3175
3840
|
Trigger().template(
|
|
3176
3841
|
TriggerTemplate(self.name)
|
|
3177
3842
|
# Trigger a deployed workflow template
|
|
3178
|
-
.
|
|
3179
|
-
|
|
3843
|
+
.k8s_trigger(
|
|
3844
|
+
StandardK8STrigger()
|
|
3180
3845
|
.source(
|
|
3181
3846
|
{
|
|
3182
3847
|
"resource": {
|
|
@@ -3235,37 +3900,27 @@ class ArgoWorkflows(object):
|
|
|
3235
3900
|
# NOTE: We need the conditional logic in order to successfully fall back to the default value
|
|
3236
3901
|
# when the event payload does not contain a key for a parameter.
|
|
3237
3902
|
# NOTE: Keys might contain dashes, so use the safer 'get' for fetching the value
|
|
3238
|
-
data_template='{{ if (hasKey $.Input.body.payload "%s") }}
|
|
3903
|
+
data_template='{{ if (hasKey $.Input.body.payload "%s") }}%s{{- else -}}{{ (fail "use-default-instead") }}{{- end -}}'
|
|
3239
3904
|
% (
|
|
3240
|
-
v,
|
|
3241
3905
|
v,
|
|
3242
3906
|
(
|
|
3243
|
-
"| toRawJson |
|
|
3907
|
+
'{{- $pv:=(get $.Input.body.payload "%s") -}}{{ if kindIs "string" $pv }}{{- $pv | toRawJson -}}{{- else -}}{{ $pv | toRawJson | toRawJson }}{{- end -}}'
|
|
3908
|
+
% v
|
|
3244
3909
|
if self.parameters[
|
|
3245
3910
|
parameter_name
|
|
3246
3911
|
]["type"]
|
|
3247
3912
|
== "JSON"
|
|
3248
|
-
else "| toRawJson
|
|
3913
|
+
else '{{- (get $.Input.body.payload "%s" | toRawJson) -}}'
|
|
3914
|
+
% v
|
|
3249
3915
|
),
|
|
3250
3916
|
),
|
|
3251
3917
|
# Unfortunately the sensor needs to
|
|
3252
3918
|
# record the default values for
|
|
3253
3919
|
# the parameters - there doesn't seem
|
|
3254
3920
|
# to be any way for us to skip
|
|
3255
|
-
value=
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
"value"
|
|
3259
|
-
]
|
|
3260
|
-
)
|
|
3261
|
-
if self.parameters[parameter_name][
|
|
3262
|
-
"type"
|
|
3263
|
-
]
|
|
3264
|
-
== "JSON"
|
|
3265
|
-
else self.parameters[
|
|
3266
|
-
parameter_name
|
|
3267
|
-
]["value"]
|
|
3268
|
-
),
|
|
3921
|
+
value=self.parameters[parameter_name][
|
|
3922
|
+
"value"
|
|
3923
|
+
],
|
|
3269
3924
|
)
|
|
3270
3925
|
.dest(
|
|
3271
3926
|
# this undocumented (mis?)feature in
|
|
@@ -3482,6 +4137,10 @@ class WorkflowStep(object):
|
|
|
3482
4137
|
self.payload["template"] = str(template)
|
|
3483
4138
|
return self
|
|
3484
4139
|
|
|
4140
|
+
def arguments(self, arguments):
|
|
4141
|
+
self.payload["arguments"] = arguments.to_json()
|
|
4142
|
+
return self
|
|
4143
|
+
|
|
3485
4144
|
def when(self, condition):
|
|
3486
4145
|
self.payload["when"] = str(condition)
|
|
3487
4146
|
return self
|
|
@@ -3774,6 +4433,14 @@ class Template(object):
|
|
|
3774
4433
|
)
|
|
3775
4434
|
return self
|
|
3776
4435
|
|
|
4436
|
+
def pod_spec_patch(self, pod_spec_patch=None):
|
|
4437
|
+
if pod_spec_patch is None:
|
|
4438
|
+
return self
|
|
4439
|
+
|
|
4440
|
+
self.payload["podSpecPatch"] = json.dumps(pod_spec_patch)
|
|
4441
|
+
|
|
4442
|
+
return self
|
|
4443
|
+
|
|
3777
4444
|
def node_selectors(self, node_selectors):
|
|
3778
4445
|
if "nodeSelector" not in self.payload:
|
|
3779
4446
|
self.payload["nodeSelector"] = {}
|
|
@@ -3916,6 +4583,10 @@ class DAGTask(object):
|
|
|
3916
4583
|
self.payload["dependencies"] = dependencies
|
|
3917
4584
|
return self
|
|
3918
4585
|
|
|
4586
|
+
def depends(self, depends: str):
|
|
4587
|
+
self.payload["depends"] = depends
|
|
4588
|
+
return self
|
|
4589
|
+
|
|
3919
4590
|
def template(self, template):
|
|
3920
4591
|
# Template reference
|
|
3921
4592
|
self.payload["template"] = template
|
|
@@ -3927,6 +4598,10 @@ class DAGTask(object):
|
|
|
3927
4598
|
self.payload["inline"] = template.to_json()
|
|
3928
4599
|
return self
|
|
3929
4600
|
|
|
4601
|
+
def when(self, when: str):
|
|
4602
|
+
self.payload["when"] = when
|
|
4603
|
+
return self
|
|
4604
|
+
|
|
3930
4605
|
def with_param(self, with_param):
|
|
3931
4606
|
self.payload["withParam"] = with_param
|
|
3932
4607
|
return self
|
|
@@ -4146,6 +4821,10 @@ class TriggerTemplate(object):
|
|
|
4146
4821
|
self.payload = tree()
|
|
4147
4822
|
self.payload["name"] = name
|
|
4148
4823
|
|
|
4824
|
+
def k8s_trigger(self, k8s_trigger):
|
|
4825
|
+
self.payload["k8s"] = k8s_trigger.to_json()
|
|
4826
|
+
return self
|
|
4827
|
+
|
|
4149
4828
|
def argo_workflow_trigger(self, argo_workflow_trigger):
|
|
4150
4829
|
self.payload["argoWorkflow"] = argo_workflow_trigger.to_json()
|
|
4151
4830
|
return self
|
|
@@ -4222,51 +4901,51 @@ class TriggerParameter(object):
|
|
|
4222
4901
|
return json.dumps(self.payload, indent=4)
|
|
4223
4902
|
|
|
4224
4903
|
|
|
4225
|
-
class
|
|
4226
|
-
# https://
|
|
4904
|
+
class StandardK8STrigger(object):
|
|
4905
|
+
# https://pkg.go.dev/github.com/argoproj/argo-events/pkg/apis/sensor/v1alpha1#StandardK8STrigger
|
|
4227
4906
|
|
|
4228
|
-
def __init__(self
|
|
4907
|
+
def __init__(self):
|
|
4229
4908
|
tree = lambda: defaultdict(tree)
|
|
4230
4909
|
self.payload = tree()
|
|
4231
|
-
self.payload["
|
|
4232
|
-
self.payload["headers"] = []
|
|
4910
|
+
self.payload["operation"] = "create"
|
|
4233
4911
|
|
|
4234
|
-
def
|
|
4235
|
-
self.payload["
|
|
4912
|
+
def operation(self, operation):
|
|
4913
|
+
self.payload["operation"] = operation
|
|
4236
4914
|
return self
|
|
4237
4915
|
|
|
4238
|
-
def
|
|
4239
|
-
self.payload["
|
|
4916
|
+
def group(self, group):
|
|
4917
|
+
self.payload["group"] = group
|
|
4240
4918
|
return self
|
|
4241
4919
|
|
|
4242
|
-
def
|
|
4243
|
-
self.payload["
|
|
4920
|
+
def version(self, version):
|
|
4921
|
+
self.payload["version"] = version
|
|
4244
4922
|
return self
|
|
4245
4923
|
|
|
4246
|
-
def
|
|
4247
|
-
self.payload["
|
|
4924
|
+
def resource(self, resource):
|
|
4925
|
+
self.payload["resource"] = resource
|
|
4248
4926
|
return self
|
|
4249
4927
|
|
|
4250
|
-
def
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
def __str__(self):
|
|
4254
|
-
return json.dumps(self.payload, indent=4)
|
|
4255
|
-
|
|
4928
|
+
def namespace(self, namespace):
|
|
4929
|
+
self.payload["namespace"] = namespace
|
|
4930
|
+
return self
|
|
4256
4931
|
|
|
4257
|
-
|
|
4258
|
-
|
|
4932
|
+
def source(self, source):
|
|
4933
|
+
self.payload["source"] = source
|
|
4934
|
+
return self
|
|
4259
4935
|
|
|
4260
|
-
def
|
|
4261
|
-
|
|
4262
|
-
|
|
4936
|
+
def parameters(self, trigger_parameters):
|
|
4937
|
+
if "parameters" not in self.payload:
|
|
4938
|
+
self.payload["parameters"] = []
|
|
4939
|
+
for trigger_parameter in trigger_parameters:
|
|
4940
|
+
self.payload["parameters"].append(trigger_parameter.to_json())
|
|
4941
|
+
return self
|
|
4263
4942
|
|
|
4264
|
-
def
|
|
4265
|
-
self.payload["
|
|
4943
|
+
def live_object(self, live_object=True):
|
|
4944
|
+
self.payload["liveObject"] = live_object
|
|
4266
4945
|
return self
|
|
4267
4946
|
|
|
4268
|
-
def
|
|
4269
|
-
self.payload["
|
|
4947
|
+
def patch_strategy(self, patch_strategy):
|
|
4948
|
+
self.payload["patchStrategy"] = patch_strategy
|
|
4270
4949
|
return self
|
|
4271
4950
|
|
|
4272
4951
|
def to_json(self):
|