ob-metaflow 2.17.3.1__py2.py3-none-any.whl → 2.18.0.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/datastore/task_datastore.py +3 -0
- metaflow/graph.py +3 -1
- metaflow/lint.py +28 -0
- metaflow/plugins/argo/argo_workflows.py +136 -4
- metaflow/plugins/cards/card_modules/main.js +29 -29
- metaflow/plugins/package_cli.py +1 -1
- metaflow/runtime.py +166 -26
- metaflow/task.py +70 -3
- metaflow/user_configs/config_parameters.py +3 -1
- metaflow/user_decorators/user_step_decorator.py +7 -1
- metaflow/version.py +1 -1
- {ob_metaflow-2.17.3.1.dist-info → ob_metaflow-2.18.0.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.17.3.1.dist-info → ob_metaflow-2.18.0.1.dist-info}/RECORD +20 -20
- {ob_metaflow-2.17.3.1.data → ob_metaflow-2.18.0.1.data}/data/share/metaflow/devtools/Makefile +0 -0
- {ob_metaflow-2.17.3.1.data → ob_metaflow-2.18.0.1.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {ob_metaflow-2.17.3.1.data → ob_metaflow-2.18.0.1.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {ob_metaflow-2.17.3.1.dist-info → ob_metaflow-2.18.0.1.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.17.3.1.dist-info → ob_metaflow-2.18.0.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.17.3.1.dist-info → ob_metaflow-2.18.0.1.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.17.3.1.dist-info → ob_metaflow-2.18.0.1.dist-info}/top_level.txt +0 -0
|
@@ -222,6 +222,9 @@ class TaskDataStore(object):
|
|
|
222
222
|
@property
|
|
223
223
|
def pathspec_index(self):
|
|
224
224
|
idxstr = ",".join(map(str, (f.index for f in self["_foreach_stack"])))
|
|
225
|
+
if "_iteration_stack" in self:
|
|
226
|
+
itrstr = ",".join(map(str, (f for f in self["_iteration_stack"])))
|
|
227
|
+
return "%s/%s[%s][%s]" % (self._run_id, self._step_name, idxstr, itrstr)
|
|
225
228
|
return "%s/%s[%s]" % (self._run_id, self._step_name, idxstr)
|
|
226
229
|
|
|
227
230
|
@property
|
metaflow/graph.py
CHANGED
|
@@ -478,7 +478,9 @@ class FlowGraph(object):
|
|
|
478
478
|
cur_name = cur_node.matching_join
|
|
479
479
|
elif node_type == "split-switch":
|
|
480
480
|
all_paths = [
|
|
481
|
-
populate_block(s, end_name)
|
|
481
|
+
populate_block(s, end_name)
|
|
482
|
+
for s in cur_node.out_funcs
|
|
483
|
+
if s != cur_name
|
|
482
484
|
]
|
|
483
485
|
resulting_list.append(all_paths)
|
|
484
486
|
cur_name = end_name
|
metaflow/lint.py
CHANGED
|
@@ -175,6 +175,8 @@ def check_for_acyclicity(graph):
|
|
|
175
175
|
|
|
176
176
|
def check_path(node, seen):
|
|
177
177
|
for n in node.out_funcs:
|
|
178
|
+
if node.type == "split-switch" and n == node.name:
|
|
179
|
+
continue
|
|
178
180
|
if n in seen:
|
|
179
181
|
path = "->".join(seen + [n])
|
|
180
182
|
raise LintWarn(
|
|
@@ -241,6 +243,8 @@ def check_split_join_balance(graph):
|
|
|
241
243
|
elif node.type == "split-switch":
|
|
242
244
|
# For a switch, continue traversal down each path with the same stack
|
|
243
245
|
for n in node.out_funcs:
|
|
246
|
+
if node.type == "split-switch" and n == node.name:
|
|
247
|
+
continue
|
|
244
248
|
traverse(graph[n], split_stack)
|
|
245
249
|
return
|
|
246
250
|
elif node.type == "end":
|
|
@@ -293,6 +297,8 @@ def check_split_join_balance(graph):
|
|
|
293
297
|
new_stack = split_stack
|
|
294
298
|
|
|
295
299
|
for n in node.out_funcs:
|
|
300
|
+
if node.type == "split-switch" and n == node.name:
|
|
301
|
+
continue
|
|
296
302
|
traverse(graph[n], new_stack)
|
|
297
303
|
|
|
298
304
|
traverse(graph["start"], [])
|
|
@@ -410,3 +416,25 @@ def check_nested_foreach(graph):
|
|
|
410
416
|
if node.type == "foreach":
|
|
411
417
|
if any(graph[p].type == "foreach" for p in node.split_parents):
|
|
412
418
|
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
@linter.ensure_static_graph
|
|
422
|
+
@linter.check
|
|
423
|
+
def check_ambiguous_joins(graph):
|
|
424
|
+
for node in graph:
|
|
425
|
+
if node.type == "join":
|
|
426
|
+
problematic_parents = [
|
|
427
|
+
p_name
|
|
428
|
+
for p_name in node.in_funcs
|
|
429
|
+
if graph[p_name].type == "split-switch"
|
|
430
|
+
]
|
|
431
|
+
if problematic_parents:
|
|
432
|
+
msg = (
|
|
433
|
+
"A conditional path cannot lead directly to a join step.\n"
|
|
434
|
+
"In your conditional step(s) {parents}, one or more of the possible paths transition directly to the join step {join_name}.\n"
|
|
435
|
+
"As a workaround, please introduce an intermediate, unconditional step on that specific path before joining."
|
|
436
|
+
).format(
|
|
437
|
+
parents=", ".join("*%s*" % p for p in problematic_parents),
|
|
438
|
+
join_name="*%s*" % node.name,
|
|
439
|
+
)
|
|
440
|
+
raise LintWarn(msg, node.func_lineno, node.source_file)
|
|
@@ -935,6 +935,7 @@ class ArgoWorkflows(object):
|
|
|
935
935
|
self.conditional_nodes = set()
|
|
936
936
|
self.conditional_join_nodes = set()
|
|
937
937
|
self.matching_conditional_join_dict = {}
|
|
938
|
+
self.recursive_nodes = set()
|
|
938
939
|
|
|
939
940
|
node_conditional_parents = {}
|
|
940
941
|
node_conditional_branches = {}
|
|
@@ -957,6 +958,12 @@ class ArgoWorkflows(object):
|
|
|
957
958
|
)
|
|
958
959
|
node_conditional_parents[node.name] = conditional_parents
|
|
959
960
|
|
|
961
|
+
# check for recursion. this split is recursive if any of its out functions are itself.
|
|
962
|
+
if any(
|
|
963
|
+
out_func for out_func in node.out_funcs if out_func == node.name
|
|
964
|
+
):
|
|
965
|
+
self.recursive_nodes.add(node.name)
|
|
966
|
+
|
|
960
967
|
if conditional_parents and not node.type == "split-switch":
|
|
961
968
|
node_conditional_parents[node.name] = conditional_parents
|
|
962
969
|
conditional_branch = conditional_branch + [node.name]
|
|
@@ -1042,6 +1049,9 @@ class ArgoWorkflows(object):
|
|
|
1042
1049
|
def _is_conditional_join_node(self, node):
|
|
1043
1050
|
return node.name in self.conditional_join_nodes
|
|
1044
1051
|
|
|
1052
|
+
def _is_recursive_node(self, node):
|
|
1053
|
+
return node.name in self.recursive_nodes
|
|
1054
|
+
|
|
1045
1055
|
def _matching_conditional_join(self, node):
|
|
1046
1056
|
return self.matching_conditional_join_dict.get(node.name, None)
|
|
1047
1057
|
|
|
@@ -1053,6 +1063,7 @@ class ArgoWorkflows(object):
|
|
|
1053
1063
|
templates=None,
|
|
1054
1064
|
dag_tasks=None,
|
|
1055
1065
|
parent_foreach=None,
|
|
1066
|
+
seen=None,
|
|
1056
1067
|
): # Returns Tuple[List[Template], List[DAGTask]]
|
|
1057
1068
|
""" """
|
|
1058
1069
|
# Every for-each node results in a separate subDAG and an equivalent
|
|
@@ -1062,6 +1073,8 @@ class ArgoWorkflows(object):
|
|
|
1062
1073
|
# of the for-each node.
|
|
1063
1074
|
|
|
1064
1075
|
# Emit if we have reached the end of the sub workflow
|
|
1076
|
+
if seen is None:
|
|
1077
|
+
seen = []
|
|
1065
1078
|
if dag_tasks is None:
|
|
1066
1079
|
dag_tasks = []
|
|
1067
1080
|
if templates is None:
|
|
@@ -1069,6 +1082,13 @@ class ArgoWorkflows(object):
|
|
|
1069
1082
|
|
|
1070
1083
|
if exit_node is not None and exit_node is node.name:
|
|
1071
1084
|
return templates, dag_tasks
|
|
1085
|
+
if node.name in seen:
|
|
1086
|
+
return templates, dag_tasks
|
|
1087
|
+
|
|
1088
|
+
seen.append(node.name)
|
|
1089
|
+
|
|
1090
|
+
# helper variable for recursive conditional inputs
|
|
1091
|
+
has_foreach_inputs = False
|
|
1072
1092
|
if node.name == "start":
|
|
1073
1093
|
# Start node has no dependencies.
|
|
1074
1094
|
dag_task = DAGTask(self._sanitize(node.name)).template(
|
|
@@ -1082,9 +1102,10 @@ class ArgoWorkflows(object):
|
|
|
1082
1102
|
# vs what is a "num_parallel" based foreach (i.e. something that follows gang semantics.)
|
|
1083
1103
|
# A `regular` foreach is basically any arbitrary kind of foreach.
|
|
1084
1104
|
):
|
|
1105
|
+
# helper variable for recursive conditional inputs
|
|
1106
|
+
has_foreach_inputs = True
|
|
1085
1107
|
# Child of a foreach node needs input-paths as well as split-index
|
|
1086
1108
|
# This child is the first node of the sub workflow and has no dependency
|
|
1087
|
-
|
|
1088
1109
|
parameters = [
|
|
1089
1110
|
Parameter("input-paths").value("{{inputs.parameters.input-paths}}"),
|
|
1090
1111
|
Parameter("split-index").value("{{inputs.parameters.split-index}}"),
|
|
@@ -1262,6 +1283,7 @@ class ArgoWorkflows(object):
|
|
|
1262
1283
|
templates,
|
|
1263
1284
|
dag_tasks,
|
|
1264
1285
|
parent_foreach,
|
|
1286
|
+
seen,
|
|
1265
1287
|
)
|
|
1266
1288
|
return _visit(
|
|
1267
1289
|
self.graph[node.matching_join],
|
|
@@ -1269,8 +1291,102 @@ class ArgoWorkflows(object):
|
|
|
1269
1291
|
templates,
|
|
1270
1292
|
dag_tasks,
|
|
1271
1293
|
parent_foreach,
|
|
1294
|
+
seen,
|
|
1272
1295
|
)
|
|
1273
1296
|
elif node.type == "split-switch":
|
|
1297
|
+
if self._is_recursive_node(node):
|
|
1298
|
+
# we need an additional recursive template if the step is recursive
|
|
1299
|
+
# NOTE: in the recursive case, the original step is renamed in the container templates to 'recursive-<step_name>'
|
|
1300
|
+
# so that we do not have to touch the step references in the DAG.
|
|
1301
|
+
#
|
|
1302
|
+
# NOTE: The way that recursion in Argo Workflows is achieved is with the following structure:
|
|
1303
|
+
# - the usual 'example-step' template which would match example_step in flow code is renamed to 'recursive-example-step'
|
|
1304
|
+
# - templates has another template with the original task name: 'example-step'
|
|
1305
|
+
# - the template 'example-step' in turn has steps
|
|
1306
|
+
# - 'example-step-internal' which uses the metaflow step executing template 'recursive-example-step'
|
|
1307
|
+
# - 'example-step-recursion' which calls the parent template 'example-step' if switch-step output from 'example-step-internal' matches the condition.
|
|
1308
|
+
sanitized_name = self._sanitize(node.name)
|
|
1309
|
+
templates.append(
|
|
1310
|
+
Template(sanitized_name)
|
|
1311
|
+
.steps(
|
|
1312
|
+
[
|
|
1313
|
+
WorkflowStep()
|
|
1314
|
+
.name("%s-internal" % sanitized_name)
|
|
1315
|
+
.template("recursive-%s" % sanitized_name)
|
|
1316
|
+
.arguments(
|
|
1317
|
+
Arguments().parameters(
|
|
1318
|
+
[
|
|
1319
|
+
Parameter("input-paths").value(
|
|
1320
|
+
"{{inputs.parameters.input-paths}}"
|
|
1321
|
+
)
|
|
1322
|
+
]
|
|
1323
|
+
# Add the additional inputs required by specific node types.
|
|
1324
|
+
# We do not need to cover joins or @parallel, as a split-switch step can not be either one of these.
|
|
1325
|
+
+ (
|
|
1326
|
+
[
|
|
1327
|
+
Parameter("split-index").value(
|
|
1328
|
+
"{{inputs.parameters.split-index}}"
|
|
1329
|
+
)
|
|
1330
|
+
]
|
|
1331
|
+
if has_foreach_inputs
|
|
1332
|
+
else []
|
|
1333
|
+
)
|
|
1334
|
+
)
|
|
1335
|
+
)
|
|
1336
|
+
]
|
|
1337
|
+
)
|
|
1338
|
+
.steps(
|
|
1339
|
+
[
|
|
1340
|
+
WorkflowStep()
|
|
1341
|
+
.name("%s-recursion" % sanitized_name)
|
|
1342
|
+
.template(sanitized_name)
|
|
1343
|
+
.when(
|
|
1344
|
+
"{{steps.%s-internal.outputs.parameters.switch-step}}==%s"
|
|
1345
|
+
% (sanitized_name, node.name)
|
|
1346
|
+
)
|
|
1347
|
+
.arguments(
|
|
1348
|
+
Arguments().parameters(
|
|
1349
|
+
[
|
|
1350
|
+
Parameter("input-paths").value(
|
|
1351
|
+
"argo-{{workflow.name}}/%s/{{steps.%s-internal.outputs.parameters.task-id}}"
|
|
1352
|
+
% (node.name, sanitized_name)
|
|
1353
|
+
)
|
|
1354
|
+
]
|
|
1355
|
+
+ (
|
|
1356
|
+
[
|
|
1357
|
+
Parameter("split-index").value(
|
|
1358
|
+
"{{inputs.parameters.split-index}}"
|
|
1359
|
+
)
|
|
1360
|
+
]
|
|
1361
|
+
if has_foreach_inputs
|
|
1362
|
+
else []
|
|
1363
|
+
)
|
|
1364
|
+
)
|
|
1365
|
+
),
|
|
1366
|
+
]
|
|
1367
|
+
)
|
|
1368
|
+
.inputs(Inputs().parameters(parameters))
|
|
1369
|
+
.outputs(
|
|
1370
|
+
# NOTE: We try to read the output parameters from the recursive template call first (<step>-recursion), and the internal step second (<step>-internal).
|
|
1371
|
+
# This guarantees that we always get the output parameters of the last recursive step that executed.
|
|
1372
|
+
Outputs().parameters(
|
|
1373
|
+
[
|
|
1374
|
+
Parameter("task-id").valueFrom(
|
|
1375
|
+
{
|
|
1376
|
+
"expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['task-id']"
|
|
1377
|
+
% (sanitized_name, sanitized_name)
|
|
1378
|
+
}
|
|
1379
|
+
),
|
|
1380
|
+
Parameter("switch-step").valueFrom(
|
|
1381
|
+
{
|
|
1382
|
+
"expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['switch-step']"
|
|
1383
|
+
% (sanitized_name, sanitized_name)
|
|
1384
|
+
}
|
|
1385
|
+
),
|
|
1386
|
+
]
|
|
1387
|
+
)
|
|
1388
|
+
)
|
|
1389
|
+
)
|
|
1274
1390
|
for n in node.out_funcs:
|
|
1275
1391
|
_visit(
|
|
1276
1392
|
self.graph[n],
|
|
@@ -1278,6 +1394,7 @@ class ArgoWorkflows(object):
|
|
|
1278
1394
|
templates,
|
|
1279
1395
|
dag_tasks,
|
|
1280
1396
|
parent_foreach,
|
|
1397
|
+
seen,
|
|
1281
1398
|
)
|
|
1282
1399
|
|
|
1283
1400
|
return _visit(
|
|
@@ -1286,6 +1403,7 @@ class ArgoWorkflows(object):
|
|
|
1286
1403
|
templates,
|
|
1287
1404
|
dag_tasks,
|
|
1288
1405
|
parent_foreach,
|
|
1406
|
+
seen,
|
|
1289
1407
|
)
|
|
1290
1408
|
# For foreach nodes generate a new sub DAGTemplate
|
|
1291
1409
|
# We do this for "regular" foreaches (ie. `self.next(self.a, foreach=)`)
|
|
@@ -1376,6 +1494,7 @@ class ArgoWorkflows(object):
|
|
|
1376
1494
|
templates,
|
|
1377
1495
|
[],
|
|
1378
1496
|
node.name,
|
|
1497
|
+
seen,
|
|
1379
1498
|
)
|
|
1380
1499
|
|
|
1381
1500
|
# How do foreach's work on Argo:
|
|
@@ -1509,6 +1628,7 @@ class ArgoWorkflows(object):
|
|
|
1509
1628
|
templates,
|
|
1510
1629
|
dag_tasks,
|
|
1511
1630
|
parent_foreach,
|
|
1631
|
+
seen,
|
|
1512
1632
|
)
|
|
1513
1633
|
# For linear nodes continue traversing to the next node
|
|
1514
1634
|
if node.type in ("linear", "join", "start"):
|
|
@@ -1518,6 +1638,7 @@ class ArgoWorkflows(object):
|
|
|
1518
1638
|
templates,
|
|
1519
1639
|
dag_tasks,
|
|
1520
1640
|
parent_foreach,
|
|
1641
|
+
seen,
|
|
1521
1642
|
)
|
|
1522
1643
|
else:
|
|
1523
1644
|
raise ArgoWorkflowsException(
|
|
@@ -1782,8 +1903,10 @@ class ArgoWorkflows(object):
|
|
|
1782
1903
|
# foreach-joins straight out of conditional branches are not yet supported
|
|
1783
1904
|
if self._is_conditional_join_node(node):
|
|
1784
1905
|
raise ArgoWorkflowsException(
|
|
1785
|
-
"
|
|
1786
|
-
"As a workaround,
|
|
1906
|
+
"Conditional steps inside a foreach that transition directly into a join step are not currently supported.\n"
|
|
1907
|
+
"As a workaround, add a common step after the conditional steps %s "
|
|
1908
|
+
"that will transition to a join."
|
|
1909
|
+
% ", ".join("*%s*" % f for f in node.in_funcs)
|
|
1787
1910
|
)
|
|
1788
1911
|
# Set aggregated input-paths for a for-each join
|
|
1789
1912
|
foreach_step = next(
|
|
@@ -2307,8 +2430,13 @@ class ArgoWorkflows(object):
|
|
|
2307
2430
|
)
|
|
2308
2431
|
)
|
|
2309
2432
|
else:
|
|
2433
|
+
template_name = self._sanitize(node.name)
|
|
2434
|
+
if self._is_recursive_node(node):
|
|
2435
|
+
# The recursive template has the original step name,
|
|
2436
|
+
# this becomes a template within the recursive ones 'steps'
|
|
2437
|
+
template_name = self._sanitize("recursive-%s" % node.name)
|
|
2310
2438
|
yield (
|
|
2311
|
-
Template(
|
|
2439
|
+
Template(template_name)
|
|
2312
2440
|
# Set @timeout values
|
|
2313
2441
|
.active_deadline_seconds(run_time_limit)
|
|
2314
2442
|
# Set service account
|
|
@@ -3801,6 +3929,10 @@ class WorkflowStep(object):
|
|
|
3801
3929
|
self.payload["template"] = str(template)
|
|
3802
3930
|
return self
|
|
3803
3931
|
|
|
3932
|
+
def arguments(self, arguments):
|
|
3933
|
+
self.payload["arguments"] = arguments.to_json()
|
|
3934
|
+
return self
|
|
3935
|
+
|
|
3804
3936
|
def when(self, condition):
|
|
3805
3937
|
self.payload["when"] = str(condition)
|
|
3806
3938
|
return self
|