metaflow 2.12.8__py2.py3-none-any.whl → 2.12.9__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +2 -0
- metaflow/cli.py +12 -4
- metaflow/extension_support/plugins.py +1 -0
- metaflow/flowspec.py +8 -1
- metaflow/lint.py +13 -0
- metaflow/metaflow_current.py +0 -8
- metaflow/plugins/__init__.py +12 -0
- metaflow/plugins/argo/argo_workflows.py +462 -42
- metaflow/plugins/argo/argo_workflows_cli.py +60 -3
- metaflow/plugins/argo/argo_workflows_decorator.py +38 -7
- metaflow/plugins/argo/argo_workflows_deployer.py +290 -0
- metaflow/plugins/argo/jobset_input_paths.py +16 -0
- metaflow/plugins/aws/batch/batch_decorator.py +16 -13
- metaflow/plugins/aws/step_functions/step_functions_cli.py +45 -3
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +251 -0
- metaflow/plugins/cards/card_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes.py +279 -52
- metaflow/plugins/kubernetes/kubernetes_cli.py +26 -8
- metaflow/plugins/kubernetes/kubernetes_client.py +0 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +56 -44
- metaflow/plugins/kubernetes/kubernetes_job.py +6 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +510 -272
- metaflow/plugins/parallel_decorator.py +108 -8
- metaflow/plugins/secrets/secrets_decorator.py +12 -3
- metaflow/plugins/test_unbounded_foreach_decorator.py +39 -4
- metaflow/runner/deployer.py +386 -0
- metaflow/runner/metaflow_runner.py +1 -20
- metaflow/runner/nbdeploy.py +130 -0
- metaflow/runner/nbrun.py +4 -28
- metaflow/runner/utils.py +49 -0
- metaflow/runtime.py +246 -134
- metaflow/version.py +1 -1
- {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/METADATA +2 -2
- {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/RECORD +38 -32
- {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/WHEEL +1 -1
- {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/LICENSE +0 -0
- {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,9 @@ import re
|
|
5
5
|
import sys
|
6
6
|
from hashlib import sha1
|
7
7
|
|
8
|
-
from metaflow import JSONType, current, decorators, parameters
|
8
|
+
from metaflow import Run, JSONType, current, decorators, parameters
|
9
|
+
from metaflow.client.core import get_metadata
|
10
|
+
from metaflow.exception import MetaflowNotFound
|
9
11
|
from metaflow._vendor import click
|
10
12
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
11
13
|
from metaflow.metaflow_config import (
|
@@ -165,6 +167,14 @@ def argo_workflows(obj, name=None):
|
|
165
167
|
default="",
|
166
168
|
help="PagerDuty Events API V2 Integration key for workflow success/failure notifications.",
|
167
169
|
)
|
170
|
+
@click.option(
|
171
|
+
"--deployer-attribute-file",
|
172
|
+
default=None,
|
173
|
+
show_default=True,
|
174
|
+
type=str,
|
175
|
+
help="Write the workflow name to the file specified. Used internally for Metaflow's Deployer API.",
|
176
|
+
hidden=True,
|
177
|
+
)
|
168
178
|
@click.pass_obj
|
169
179
|
def create(
|
170
180
|
obj,
|
@@ -182,9 +192,21 @@ def create(
|
|
182
192
|
notify_on_success=False,
|
183
193
|
notify_slack_webhook_url=None,
|
184
194
|
notify_pager_duty_integration_key=None,
|
195
|
+
deployer_attribute_file=None,
|
185
196
|
):
|
186
197
|
validate_tags(tags)
|
187
198
|
|
199
|
+
if deployer_attribute_file:
|
200
|
+
with open(deployer_attribute_file, "w") as f:
|
201
|
+
json.dump(
|
202
|
+
{
|
203
|
+
"name": obj.workflow_name,
|
204
|
+
"flow_name": obj.flow.name,
|
205
|
+
"metadata": get_metadata(),
|
206
|
+
},
|
207
|
+
f,
|
208
|
+
)
|
209
|
+
|
188
210
|
obj.echo("Deploying *%s* to Argo Workflows..." % obj.workflow_name, bold=True)
|
189
211
|
|
190
212
|
if SERVICE_VERSION_CHECK:
|
@@ -563,8 +585,16 @@ def resolve_token(
|
|
563
585
|
type=str,
|
564
586
|
help="Write the ID of this run to the file specified.",
|
565
587
|
)
|
588
|
+
@click.option(
|
589
|
+
"--deployer-attribute-file",
|
590
|
+
default=None,
|
591
|
+
show_default=True,
|
592
|
+
type=str,
|
593
|
+
help="Write the metadata and pathspec of this run to the file specified.\nUsed internally for Metaflow's Deployer API.",
|
594
|
+
hidden=True,
|
595
|
+
)
|
566
596
|
@click.pass_obj
|
567
|
-
def trigger(obj, run_id_file=None, **kwargs):
|
597
|
+
def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
|
568
598
|
def _convert_value(param):
|
569
599
|
# Swap `-` with `_` in parameter name to match click's behavior
|
570
600
|
val = kwargs.get(param.name.replace("-", "_").lower())
|
@@ -587,6 +617,17 @@ def trigger(obj, run_id_file=None, **kwargs):
|
|
587
617
|
with open(run_id_file, "w") as f:
|
588
618
|
f.write(str(run_id))
|
589
619
|
|
620
|
+
if deployer_attribute_file:
|
621
|
+
with open(deployer_attribute_file, "w") as f:
|
622
|
+
json.dump(
|
623
|
+
{
|
624
|
+
"name": obj.workflow_name,
|
625
|
+
"metadata": get_metadata(),
|
626
|
+
"pathspec": "/".join((obj.flow.name, run_id)),
|
627
|
+
},
|
628
|
+
f,
|
629
|
+
)
|
630
|
+
|
590
631
|
obj.echo(
|
591
632
|
"Workflow *{name}* triggered on Argo Workflows "
|
592
633
|
"(run-id *{run_id}*).".format(name=obj.workflow_name, run_id=run_id),
|
@@ -786,6 +827,20 @@ def validate_token(name, token_prefix, authorize, instructions_fn=None):
|
|
786
827
|
return True
|
787
828
|
|
788
829
|
|
830
|
+
def get_run_object(pathspec: str):
|
831
|
+
try:
|
832
|
+
return Run(pathspec, _namespace_check=False)
|
833
|
+
except MetaflowNotFound:
|
834
|
+
return None
|
835
|
+
|
836
|
+
|
837
|
+
def get_status_considering_run_object(status, run_obj):
|
838
|
+
remapped_status = remap_status(status)
|
839
|
+
if remapped_status == "Running" and run_obj is None:
|
840
|
+
return "Pending"
|
841
|
+
return remapped_status
|
842
|
+
|
843
|
+
|
789
844
|
@argo_workflows.command(help="Fetch flow execution status on Argo Workflows.")
|
790
845
|
@click.argument("run-id", required=True, type=str)
|
791
846
|
@click.pass_obj
|
@@ -803,8 +858,10 @@ def status(obj, run_id):
|
|
803
858
|
# Trim prefix from run_id
|
804
859
|
name = run_id[5:]
|
805
860
|
status = ArgoWorkflows.get_workflow_status(obj.flow.name, name)
|
861
|
+
run_obj = get_run_object("/".join((obj.flow.name, run_id)))
|
806
862
|
if status is not None:
|
807
|
-
|
863
|
+
status = get_status_considering_run_object(status, run_obj)
|
864
|
+
obj.echo_always(status)
|
808
865
|
|
809
866
|
|
810
867
|
@argo_workflows.command(help="Terminate flow execution on Argo Workflows.")
|
@@ -2,12 +2,14 @@ import json
|
|
2
2
|
import os
|
3
3
|
import time
|
4
4
|
|
5
|
+
|
5
6
|
from metaflow import current
|
6
7
|
from metaflow.decorators import StepDecorator
|
7
8
|
from metaflow.events import Trigger
|
8
9
|
from metaflow.metadata import MetaDatum
|
9
10
|
from metaflow.metaflow_config import ARGO_EVENTS_WEBHOOK_URL
|
10
|
-
|
11
|
+
from metaflow.graph import DAGNode, FlowGraph
|
12
|
+
from metaflow.flowspec import FlowSpec
|
11
13
|
from .argo_events import ArgoEvent
|
12
14
|
|
13
15
|
|
@@ -83,7 +85,13 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
|
|
83
85
|
metadata.register_metadata(run_id, step_name, task_id, entries)
|
84
86
|
|
85
87
|
def task_finished(
|
86
|
-
self,
|
88
|
+
self,
|
89
|
+
step_name,
|
90
|
+
flow: FlowSpec,
|
91
|
+
graph: FlowGraph,
|
92
|
+
is_task_ok,
|
93
|
+
retry_count,
|
94
|
+
max_user_code_retries,
|
87
95
|
):
|
88
96
|
if not is_task_ok:
|
89
97
|
# The task finished with an exception - execution won't
|
@@ -100,16 +108,39 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
|
|
100
108
|
# we run pods with a security context. We work around this constraint by
|
101
109
|
# mounting an emptyDir volume.
|
102
110
|
if graph[step_name].type == "foreach":
|
111
|
+
# A DAGNode is considered a `parallel_step` if it is annotated by the @parallel decorator.
|
112
|
+
# A DAGNode is considered a `parallel_foreach` if it contains a `num_parallel` kwarg provided to the
|
113
|
+
# `next` method of that DAGNode.
|
114
|
+
# At this moment in the code we care if a node is marked as a `parallel_foreach` so that we can pass down the
|
115
|
+
# value of `num_parallel` to the subsequent steps.
|
116
|
+
# For @parallel, the implmentation uses 1 jobset object. That one jobset
|
117
|
+
# object internally creates 'num_parallel' jobs. So, we set foreach_num_splits
|
118
|
+
# to 1 here for @parallel. The parallelism of jobset is handled in
|
119
|
+
# kubernetes_job.py.
|
120
|
+
if graph[step_name].parallel_foreach:
|
121
|
+
with open("/mnt/out/num_parallel", "w") as f:
|
122
|
+
json.dump(flow._parallel_ubf_iter.num_parallel, f)
|
123
|
+
flow._foreach_num_splits = 1
|
124
|
+
with open("/mnt/out/task_id_entropy", "w") as file:
|
125
|
+
import uuid
|
126
|
+
|
127
|
+
file.write(uuid.uuid4().hex[:6])
|
128
|
+
|
103
129
|
with open("/mnt/out/splits", "w") as file:
|
104
130
|
json.dump(list(range(flow._foreach_num_splits)), file)
|
105
131
|
with open("/mnt/out/split_cardinality", "w") as file:
|
106
132
|
json.dump(flow._foreach_num_splits, file)
|
107
133
|
|
108
|
-
#
|
109
|
-
#
|
110
|
-
#
|
111
|
-
|
112
|
-
|
134
|
+
# for steps that have a `@parallel` decorator set to them, we will be relying on Jobsets
|
135
|
+
# to run the task. In this case, we cannot set anything in the
|
136
|
+
# `/mnt/out` directory, since such form of output mounts are not available to jobset execution as
|
137
|
+
# argo just treats it like A K8s resource that it throws in the cluster.
|
138
|
+
if not graph[step_name].parallel_step:
|
139
|
+
# Unfortunately, we can't always use pod names as task-ids since the pod names
|
140
|
+
# are not static across retries. We write the task-id to a file that is read
|
141
|
+
# by the next task here.
|
142
|
+
with open("/mnt/out/task_id", "w") as file:
|
143
|
+
file.write(self.task_id)
|
113
144
|
|
114
145
|
# Emit Argo Events given that the flow has succeeded. Given that we only
|
115
146
|
# emit events when the task succeeds, we can piggy back on this decorator
|
@@ -0,0 +1,290 @@
|
|
1
|
+
import sys
|
2
|
+
import tempfile
|
3
|
+
from typing import Optional, ClassVar
|
4
|
+
|
5
|
+
from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
|
6
|
+
from metaflow.runner.deployer import (
|
7
|
+
DeployerImpl,
|
8
|
+
DeployedFlow,
|
9
|
+
TriggeredRun,
|
10
|
+
get_lower_level_group,
|
11
|
+
handle_timeout,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
def suspend(instance: TriggeredRun, **kwargs):
|
16
|
+
"""
|
17
|
+
Suspend the running workflow.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
**kwargs : Any
|
22
|
+
Additional arguments to pass to the suspend command.
|
23
|
+
|
24
|
+
Returns
|
25
|
+
-------
|
26
|
+
bool
|
27
|
+
True if the command was successful, False otherwise.
|
28
|
+
"""
|
29
|
+
_, run_id = instance.pathspec.split("/")
|
30
|
+
|
31
|
+
# every subclass needs to have `self.deployer_kwargs`
|
32
|
+
command = get_lower_level_group(
|
33
|
+
instance.deployer.api,
|
34
|
+
instance.deployer.top_level_kwargs,
|
35
|
+
instance.deployer.TYPE,
|
36
|
+
instance.deployer.deployer_kwargs,
|
37
|
+
).suspend(run_id=run_id, **kwargs)
|
38
|
+
|
39
|
+
pid = instance.deployer.spm.run_command(
|
40
|
+
[sys.executable, *command],
|
41
|
+
env=instance.deployer.env_vars,
|
42
|
+
cwd=instance.deployer.cwd,
|
43
|
+
show_output=instance.deployer.show_output,
|
44
|
+
)
|
45
|
+
|
46
|
+
command_obj = instance.deployer.spm.get(pid)
|
47
|
+
return command_obj.process.returncode == 0
|
48
|
+
|
49
|
+
|
50
|
+
def unsuspend(instance: TriggeredRun, **kwargs):
|
51
|
+
"""
|
52
|
+
Unsuspend the suspended workflow.
|
53
|
+
|
54
|
+
Parameters
|
55
|
+
----------
|
56
|
+
**kwargs : Any
|
57
|
+
Additional arguments to pass to the unsuspend command.
|
58
|
+
|
59
|
+
Returns
|
60
|
+
-------
|
61
|
+
bool
|
62
|
+
True if the command was successful, False otherwise.
|
63
|
+
"""
|
64
|
+
_, run_id = instance.pathspec.split("/")
|
65
|
+
|
66
|
+
# every subclass needs to have `self.deployer_kwargs`
|
67
|
+
command = get_lower_level_group(
|
68
|
+
instance.deployer.api,
|
69
|
+
instance.deployer.top_level_kwargs,
|
70
|
+
instance.deployer.TYPE,
|
71
|
+
instance.deployer.deployer_kwargs,
|
72
|
+
).unsuspend(run_id=run_id, **kwargs)
|
73
|
+
|
74
|
+
pid = instance.deployer.spm.run_command(
|
75
|
+
[sys.executable, *command],
|
76
|
+
env=instance.deployer.env_vars,
|
77
|
+
cwd=instance.deployer.cwd,
|
78
|
+
show_output=instance.deployer.show_output,
|
79
|
+
)
|
80
|
+
|
81
|
+
command_obj = instance.deployer.spm.get(pid)
|
82
|
+
return command_obj.process.returncode == 0
|
83
|
+
|
84
|
+
|
85
|
+
def terminate(instance: TriggeredRun, **kwargs):
|
86
|
+
"""
|
87
|
+
Terminate the running workflow.
|
88
|
+
|
89
|
+
Parameters
|
90
|
+
----------
|
91
|
+
**kwargs : Any
|
92
|
+
Additional arguments to pass to the terminate command.
|
93
|
+
|
94
|
+
Returns
|
95
|
+
-------
|
96
|
+
bool
|
97
|
+
True if the command was successful, False otherwise.
|
98
|
+
"""
|
99
|
+
_, run_id = instance.pathspec.split("/")
|
100
|
+
|
101
|
+
# every subclass needs to have `self.deployer_kwargs`
|
102
|
+
command = get_lower_level_group(
|
103
|
+
instance.deployer.api,
|
104
|
+
instance.deployer.top_level_kwargs,
|
105
|
+
instance.deployer.TYPE,
|
106
|
+
instance.deployer.deployer_kwargs,
|
107
|
+
).terminate(run_id=run_id, **kwargs)
|
108
|
+
|
109
|
+
pid = instance.deployer.spm.run_command(
|
110
|
+
[sys.executable, *command],
|
111
|
+
env=instance.deployer.env_vars,
|
112
|
+
cwd=instance.deployer.cwd,
|
113
|
+
show_output=instance.deployer.show_output,
|
114
|
+
)
|
115
|
+
|
116
|
+
command_obj = instance.deployer.spm.get(pid)
|
117
|
+
return command_obj.process.returncode == 0
|
118
|
+
|
119
|
+
|
120
|
+
def status(instance: TriggeredRun):
|
121
|
+
"""
|
122
|
+
Get the status of the triggered run.
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
str, optional
|
127
|
+
The status of the workflow considering the run object, or None if the status could not be retrieved.
|
128
|
+
"""
|
129
|
+
from metaflow.plugins.argo.argo_workflows_cli import (
|
130
|
+
get_status_considering_run_object,
|
131
|
+
)
|
132
|
+
|
133
|
+
flow_name, run_id = instance.pathspec.split("/")
|
134
|
+
name = run_id[5:]
|
135
|
+
status = ArgoWorkflows.get_workflow_status(flow_name, name)
|
136
|
+
if status is not None:
|
137
|
+
return get_status_considering_run_object(status, instance.run)
|
138
|
+
return None
|
139
|
+
|
140
|
+
|
141
|
+
def production_token(instance: DeployedFlow):
|
142
|
+
"""
|
143
|
+
Get the production token for the deployed flow.
|
144
|
+
|
145
|
+
Returns
|
146
|
+
-------
|
147
|
+
str, optional
|
148
|
+
The production token, None if it cannot be retrieved.
|
149
|
+
"""
|
150
|
+
try:
|
151
|
+
_, production_token = ArgoWorkflows.get_existing_deployment(
|
152
|
+
instance.deployer.name
|
153
|
+
)
|
154
|
+
return production_token
|
155
|
+
except TypeError:
|
156
|
+
return None
|
157
|
+
|
158
|
+
|
159
|
+
def delete(instance: DeployedFlow, **kwargs):
|
160
|
+
"""
|
161
|
+
Delete the deployed flow.
|
162
|
+
|
163
|
+
Parameters
|
164
|
+
----------
|
165
|
+
**kwargs : Any
|
166
|
+
Additional arguments to pass to the delete command.
|
167
|
+
|
168
|
+
Returns
|
169
|
+
-------
|
170
|
+
bool
|
171
|
+
True if the command was successful, False otherwise.
|
172
|
+
"""
|
173
|
+
command = get_lower_level_group(
|
174
|
+
instance.deployer.api,
|
175
|
+
instance.deployer.top_level_kwargs,
|
176
|
+
instance.deployer.TYPE,
|
177
|
+
instance.deployer.deployer_kwargs,
|
178
|
+
).delete(**kwargs)
|
179
|
+
|
180
|
+
pid = instance.deployer.spm.run_command(
|
181
|
+
[sys.executable, *command],
|
182
|
+
env=instance.deployer.env_vars,
|
183
|
+
cwd=instance.deployer.cwd,
|
184
|
+
show_output=instance.deployer.show_output,
|
185
|
+
)
|
186
|
+
|
187
|
+
command_obj = instance.deployer.spm.get(pid)
|
188
|
+
return command_obj.process.returncode == 0
|
189
|
+
|
190
|
+
|
191
|
+
def trigger(instance: DeployedFlow, **kwargs):
|
192
|
+
"""
|
193
|
+
Trigger a new run for the deployed flow.
|
194
|
+
|
195
|
+
Parameters
|
196
|
+
----------
|
197
|
+
**kwargs : Any
|
198
|
+
Additional arguments to pass to the trigger command, `Parameters` in particular
|
199
|
+
|
200
|
+
Returns
|
201
|
+
-------
|
202
|
+
ArgoWorkflowsTriggeredRun
|
203
|
+
The triggered run instance.
|
204
|
+
|
205
|
+
Raises
|
206
|
+
------
|
207
|
+
Exception
|
208
|
+
If there is an error during the trigger process.
|
209
|
+
"""
|
210
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
211
|
+
tfp_runner_attribute = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False)
|
212
|
+
|
213
|
+
# every subclass needs to have `self.deployer_kwargs`
|
214
|
+
command = get_lower_level_group(
|
215
|
+
instance.deployer.api,
|
216
|
+
instance.deployer.top_level_kwargs,
|
217
|
+
instance.deployer.TYPE,
|
218
|
+
instance.deployer.deployer_kwargs,
|
219
|
+
).trigger(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
|
220
|
+
|
221
|
+
pid = instance.deployer.spm.run_command(
|
222
|
+
[sys.executable, *command],
|
223
|
+
env=instance.deployer.env_vars,
|
224
|
+
cwd=instance.deployer.cwd,
|
225
|
+
show_output=instance.deployer.show_output,
|
226
|
+
)
|
227
|
+
|
228
|
+
command_obj = instance.deployer.spm.get(pid)
|
229
|
+
content = handle_timeout(tfp_runner_attribute, command_obj)
|
230
|
+
|
231
|
+
if command_obj.process.returncode == 0:
|
232
|
+
triggered_run = TriggeredRun(deployer=instance.deployer, content=content)
|
233
|
+
triggered_run._enrich_object(
|
234
|
+
{
|
235
|
+
"status": property(status),
|
236
|
+
"terminate": terminate,
|
237
|
+
"suspend": suspend,
|
238
|
+
"unsuspend": unsuspend,
|
239
|
+
}
|
240
|
+
)
|
241
|
+
return triggered_run
|
242
|
+
|
243
|
+
raise Exception(
|
244
|
+
"Error triggering %s on %s for %s"
|
245
|
+
% (instance.deployer.name, instance.deployer.TYPE, instance.deployer.flow_file)
|
246
|
+
)
|
247
|
+
|
248
|
+
|
249
|
+
class ArgoWorkflowsDeployer(DeployerImpl):
|
250
|
+
"""
|
251
|
+
Deployer implementation for Argo Workflows.
|
252
|
+
|
253
|
+
Attributes
|
254
|
+
----------
|
255
|
+
TYPE : ClassVar[Optional[str]]
|
256
|
+
The type of the deployer, which is "argo-workflows".
|
257
|
+
"""
|
258
|
+
|
259
|
+
TYPE: ClassVar[Optional[str]] = "argo-workflows"
|
260
|
+
|
261
|
+
def __init__(self, deployer_kwargs, **kwargs):
|
262
|
+
"""
|
263
|
+
Initialize the ArgoWorkflowsDeployer.
|
264
|
+
|
265
|
+
Parameters
|
266
|
+
----------
|
267
|
+
deployer_kwargs : dict
|
268
|
+
The deployer-specific keyword arguments.
|
269
|
+
**kwargs : Any
|
270
|
+
Additional arguments to pass to the superclass constructor.
|
271
|
+
"""
|
272
|
+
self.deployer_kwargs = deployer_kwargs
|
273
|
+
super().__init__(**kwargs)
|
274
|
+
|
275
|
+
def _enrich_deployed_flow(self, deployed_flow: DeployedFlow):
|
276
|
+
"""
|
277
|
+
Enrich the DeployedFlow object with additional properties and methods.
|
278
|
+
|
279
|
+
Parameters
|
280
|
+
----------
|
281
|
+
deployed_flow : DeployedFlow
|
282
|
+
The deployed flow object to enrich.
|
283
|
+
"""
|
284
|
+
deployed_flow._enrich_object(
|
285
|
+
{
|
286
|
+
"production_token": property(production_token),
|
287
|
+
"trigger": trigger,
|
288
|
+
"delete": delete,
|
289
|
+
}
|
290
|
+
)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import sys
|
2
|
+
from hashlib import md5
|
3
|
+
|
4
|
+
|
5
|
+
def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel):
|
6
|
+
# => run_id/step/:foo,bar
|
7
|
+
control_id = "control-{}-0".format(task_id_entropy)
|
8
|
+
worker_ids = [
|
9
|
+
"worker-{}-{}".format(task_id_entropy, i) for i in range(int(num_parallel) - 1)
|
10
|
+
]
|
11
|
+
ids = [control_id] + worker_ids
|
12
|
+
return "{}/{}/:{}".format(run_id, step_name, ",".join(ids))
|
13
|
+
|
14
|
+
|
15
|
+
if __name__ == "__main__":
|
16
|
+
print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]))
|
@@ -261,8 +261,8 @@ class BatchDecorator(StepDecorator):
|
|
261
261
|
# metadata. A rudimentary way to detect non-local execution is to
|
262
262
|
# check for the existence of AWS_BATCH_JOB_ID environment variable.
|
263
263
|
|
264
|
+
meta = {}
|
264
265
|
if "AWS_BATCH_JOB_ID" in os.environ:
|
265
|
-
meta = {}
|
266
266
|
meta["aws-batch-job-id"] = os.environ["AWS_BATCH_JOB_ID"]
|
267
267
|
meta["aws-batch-job-attempt"] = os.environ["AWS_BATCH_JOB_ATTEMPT"]
|
268
268
|
meta["aws-batch-ce-name"] = os.environ["AWS_BATCH_CE_NAME"]
|
@@ -290,18 +290,6 @@ class BatchDecorator(StepDecorator):
|
|
290
290
|
instance_meta = get_ec2_instance_metadata()
|
291
291
|
meta.update(instance_meta)
|
292
292
|
|
293
|
-
entries = [
|
294
|
-
MetaDatum(
|
295
|
-
field=k,
|
296
|
-
value=v,
|
297
|
-
type=k,
|
298
|
-
tags=["attempt_id:{0}".format(retry_count)],
|
299
|
-
)
|
300
|
-
for k, v in meta.items()
|
301
|
-
]
|
302
|
-
# Register book-keeping metadata for debugging.
|
303
|
-
metadata.register_metadata(run_id, step_name, task_id, entries)
|
304
|
-
|
305
293
|
self._save_logs_sidecar = Sidecar("save_logs_periodically")
|
306
294
|
self._save_logs_sidecar.start()
|
307
295
|
|
@@ -322,6 +310,21 @@ class BatchDecorator(StepDecorator):
|
|
322
310
|
|
323
311
|
if num_parallel >= 1:
|
324
312
|
_setup_multinode_environment()
|
313
|
+
# current.parallel.node_index will be correctly available over here.
|
314
|
+
meta.update({"parallel-node-index": current.parallel.node_index})
|
315
|
+
|
316
|
+
if len(meta) > 0:
|
317
|
+
entries = [
|
318
|
+
MetaDatum(
|
319
|
+
field=k,
|
320
|
+
value=v,
|
321
|
+
type=k,
|
322
|
+
tags=["attempt_id:{0}".format(retry_count)],
|
323
|
+
)
|
324
|
+
for k, v in meta.items()
|
325
|
+
]
|
326
|
+
# Register book-keeping metadata for debugging.
|
327
|
+
metadata.register_metadata(run_id, step_name, task_id, entries)
|
325
328
|
|
326
329
|
def task_finished(
|
327
330
|
self, step_name, flow, graph, is_task_ok, retry_count, max_retries
|
@@ -4,6 +4,7 @@ import re
|
|
4
4
|
from hashlib import sha1
|
5
5
|
|
6
6
|
from metaflow import JSONType, current, decorators, parameters
|
7
|
+
from metaflow.client.core import get_metadata
|
7
8
|
from metaflow._vendor import click
|
8
9
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
9
10
|
from metaflow.metaflow_config import (
|
@@ -130,6 +131,14 @@ def step_functions(obj, name=None):
|
|
130
131
|
help="Use AWS Step Functions Distributed Map instead of Inline Map for "
|
131
132
|
"defining foreach tasks in Amazon State Language.",
|
132
133
|
)
|
134
|
+
@click.option(
|
135
|
+
"--deployer-attribute-file",
|
136
|
+
default=None,
|
137
|
+
show_default=True,
|
138
|
+
type=str,
|
139
|
+
help="Write the workflow name to the file specified. Used internally for Metaflow's Deployer API.",
|
140
|
+
hidden=True,
|
141
|
+
)
|
133
142
|
@click.pass_obj
|
134
143
|
def create(
|
135
144
|
obj,
|
@@ -143,9 +152,21 @@ def create(
|
|
143
152
|
workflow_timeout=None,
|
144
153
|
log_execution_history=False,
|
145
154
|
use_distributed_map=False,
|
155
|
+
deployer_attribute_file=None,
|
146
156
|
):
|
147
157
|
validate_tags(tags)
|
148
158
|
|
159
|
+
if deployer_attribute_file:
|
160
|
+
with open(deployer_attribute_file, "w") as f:
|
161
|
+
json.dump(
|
162
|
+
{
|
163
|
+
"name": obj.state_machine_name,
|
164
|
+
"flow_name": obj.flow.name,
|
165
|
+
"metadata": get_metadata(),
|
166
|
+
},
|
167
|
+
f,
|
168
|
+
)
|
169
|
+
|
149
170
|
obj.echo(
|
150
171
|
"Deploying *%s* to AWS Step Functions..." % obj.state_machine_name, bold=True
|
151
172
|
)
|
@@ -231,8 +252,10 @@ def check_metadata_service_version(obj):
|
|
231
252
|
|
232
253
|
|
233
254
|
def resolve_state_machine_name(obj, name):
|
234
|
-
def attach_prefix(name):
|
235
|
-
if SFN_STATE_MACHINE_PREFIX is not None
|
255
|
+
def attach_prefix(name: str):
|
256
|
+
if SFN_STATE_MACHINE_PREFIX is not None and (
|
257
|
+
not name.startswith(SFN_STATE_MACHINE_PREFIX)
|
258
|
+
):
|
236
259
|
return SFN_STATE_MACHINE_PREFIX + "_" + name
|
237
260
|
return name
|
238
261
|
|
@@ -440,8 +463,16 @@ def resolve_token(
|
|
440
463
|
type=str,
|
441
464
|
help="Write the ID of this run to the file specified.",
|
442
465
|
)
|
466
|
+
@click.option(
|
467
|
+
"--deployer-attribute-file",
|
468
|
+
default=None,
|
469
|
+
show_default=True,
|
470
|
+
type=str,
|
471
|
+
help="Write the metadata and pathspec of this run to the file specified.\nUsed internally for Metaflow's Deployer API.",
|
472
|
+
hidden=True,
|
473
|
+
)
|
443
474
|
@click.pass_obj
|
444
|
-
def trigger(obj, run_id_file=None, **kwargs):
|
475
|
+
def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
|
445
476
|
def _convert_value(param):
|
446
477
|
# Swap `-` with `_` in parameter name to match click's behavior
|
447
478
|
val = kwargs.get(param.name.replace("-", "_").lower())
|
@@ -466,6 +497,17 @@ def trigger(obj, run_id_file=None, **kwargs):
|
|
466
497
|
with open(run_id_file, "w") as f:
|
467
498
|
f.write(str(run_id))
|
468
499
|
|
500
|
+
if deployer_attribute_file:
|
501
|
+
with open(deployer_attribute_file, "w") as f:
|
502
|
+
json.dump(
|
503
|
+
{
|
504
|
+
"name": obj.state_machine_name,
|
505
|
+
"metadata": get_metadata(),
|
506
|
+
"pathspec": "/".join((obj.flow.name, run_id)),
|
507
|
+
},
|
508
|
+
f,
|
509
|
+
)
|
510
|
+
|
469
511
|
obj.echo(
|
470
512
|
"Workflow *{name}* triggered on AWS Step Functions "
|
471
513
|
"(run-id *{run_id}*).".format(name=obj.state_machine_name, run_id=run_id),
|