metaflow 2.12.7__py2.py3-none-any.whl → 2.12.9__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +2 -0
- metaflow/cli.py +12 -4
- metaflow/extension_support/plugins.py +1 -0
- metaflow/flowspec.py +8 -1
- metaflow/lint.py +13 -0
- metaflow/metaflow_current.py +0 -8
- metaflow/plugins/__init__.py +12 -0
- metaflow/plugins/argo/argo_workflows.py +462 -42
- metaflow/plugins/argo/argo_workflows_cli.py +60 -3
- metaflow/plugins/argo/argo_workflows_decorator.py +38 -7
- metaflow/plugins/argo/argo_workflows_deployer.py +290 -0
- metaflow/plugins/argo/jobset_input_paths.py +16 -0
- metaflow/plugins/aws/batch/batch_decorator.py +16 -13
- metaflow/plugins/aws/step_functions/step_functions_cli.py +45 -3
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +251 -0
- metaflow/plugins/cards/card_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes.py +279 -52
- metaflow/plugins/kubernetes/kubernetes_cli.py +26 -8
- metaflow/plugins/kubernetes/kubernetes_client.py +0 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +56 -44
- metaflow/plugins/kubernetes/kubernetes_job.py +6 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +510 -272
- metaflow/plugins/parallel_decorator.py +108 -8
- metaflow/plugins/pypi/bootstrap.py +1 -1
- metaflow/plugins/pypi/micromamba.py +1 -1
- metaflow/plugins/secrets/secrets_decorator.py +12 -3
- metaflow/plugins/test_unbounded_foreach_decorator.py +39 -4
- metaflow/runner/deployer.py +386 -0
- metaflow/runner/metaflow_runner.py +1 -20
- metaflow/runner/nbdeploy.py +130 -0
- metaflow/runner/nbrun.py +4 -28
- metaflow/runner/utils.py +49 -0
- metaflow/runtime.py +246 -134
- metaflow/version.py +1 -1
- {metaflow-2.12.7.dist-info → metaflow-2.12.9.dist-info}/METADATA +2 -2
- {metaflow-2.12.7.dist-info → metaflow-2.12.9.dist-info}/RECORD +40 -34
- {metaflow-2.12.7.dist-info → metaflow-2.12.9.dist-info}/WHEEL +1 -1
- {metaflow-2.12.7.dist-info → metaflow-2.12.9.dist-info}/LICENSE +0 -0
- {metaflow-2.12.7.dist-info → metaflow-2.12.9.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.7.dist-info → metaflow-2.12.9.dist-info}/top_level.txt +0 -0
metaflow/runtime.py
CHANGED
@@ -16,6 +16,7 @@ from functools import partial
|
|
16
16
|
from concurrent import futures
|
17
17
|
|
18
18
|
from metaflow.datastore.exceptions import DataException
|
19
|
+
from contextlib import contextmanager
|
19
20
|
|
20
21
|
from . import get_namespace
|
21
22
|
from .metadata import MetaDatum
|
@@ -109,6 +110,8 @@ class NativeRuntime(object):
|
|
109
110
|
self._clone_run_id = clone_run_id
|
110
111
|
self._clone_only = clone_only
|
111
112
|
self._clone_steps = {} if clone_steps is None else clone_steps
|
113
|
+
self._cloned_tasks = []
|
114
|
+
self._cloned_task_index = set()
|
112
115
|
self._reentrant = reentrant
|
113
116
|
self._run_url = None
|
114
117
|
|
@@ -203,6 +206,22 @@ class NativeRuntime(object):
|
|
203
206
|
|
204
207
|
self._is_cloned[self._params_task.path] = self._params_task.is_cloned
|
205
208
|
|
209
|
+
def should_skip_clone_only_execution(self):
|
210
|
+
(
|
211
|
+
should_skip_clone_only_execution,
|
212
|
+
skip_reason,
|
213
|
+
) = self._should_skip_clone_only_execution()
|
214
|
+
if should_skip_clone_only_execution:
|
215
|
+
self._logger(skip_reason, system_msg=True)
|
216
|
+
return True
|
217
|
+
return False
|
218
|
+
|
219
|
+
@contextmanager
|
220
|
+
def run_heartbeat(self):
|
221
|
+
self._metadata.start_run_heartbeat(self._flow.name, self._run_id)
|
222
|
+
yield
|
223
|
+
self._metadata.stop_heartbeat()
|
224
|
+
|
206
225
|
def print_workflow_info(self):
|
207
226
|
self._run_url = (
|
208
227
|
"%s/%s/%s" % (UI_URL.rstrip("/"), self._flow.name, self._run_id)
|
@@ -235,41 +254,58 @@ class NativeRuntime(object):
|
|
235
254
|
)
|
236
255
|
return False, None
|
237
256
|
|
238
|
-
def clone_task(
|
239
|
-
self
|
240
|
-
|
257
|
+
def clone_task(
|
258
|
+
self,
|
259
|
+
step_name,
|
260
|
+
task_id,
|
261
|
+
pathspec_index,
|
262
|
+
ubf_context,
|
263
|
+
generate_task_obj,
|
264
|
+
verbose=False,
|
265
|
+
):
|
266
|
+
try:
|
267
|
+
new_task_id = task_id
|
268
|
+
if generate_task_obj:
|
269
|
+
task = self._new_task(step_name, pathspec_index=pathspec_index)
|
270
|
+
if ubf_context:
|
271
|
+
task.ubf_context = ubf_context
|
272
|
+
new_task_id = task.task_id
|
273
|
+
self._cloned_tasks.append(task)
|
274
|
+
self._cloned_task_index.add(task.task_index)
|
275
|
+
|
276
|
+
if verbose:
|
277
|
+
self._logger(
|
278
|
+
"Cloning task from {}/{}/{}/{} to {}/{}/{}/{}".format(
|
279
|
+
self._flow.name,
|
280
|
+
self._clone_run_id,
|
281
|
+
step_name,
|
282
|
+
task_id,
|
283
|
+
self._flow.name,
|
284
|
+
self._run_id,
|
285
|
+
step_name,
|
286
|
+
new_task_id,
|
287
|
+
),
|
288
|
+
system_msg=True,
|
289
|
+
)
|
290
|
+
clone_task_helper(
|
241
291
|
self._flow.name,
|
242
292
|
self._clone_run_id,
|
243
|
-
step_name,
|
244
|
-
task_id,
|
245
|
-
self._flow.name,
|
246
293
|
self._run_id,
|
247
294
|
step_name,
|
248
|
-
task_id,
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
self.
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
self._metadata,
|
261
|
-
origin_ds_set=self._origin_ds_set,
|
262
|
-
)
|
295
|
+
task_id, # origin_task_id
|
296
|
+
new_task_id,
|
297
|
+
self._flow_datastore,
|
298
|
+
self._metadata,
|
299
|
+
origin_ds_set=self._origin_ds_set,
|
300
|
+
)
|
301
|
+
except Exception as e:
|
302
|
+
self._logger(
|
303
|
+
"Cloning task from {}/{}/{} failed with error: {}".format(
|
304
|
+
self._clone_run_id, step_name, task_id, str(e)
|
305
|
+
)
|
306
|
+
)
|
263
307
|
|
264
|
-
def clone_original_run(self):
|
265
|
-
(
|
266
|
-
should_skip_clone_only_execution,
|
267
|
-
skip_reason,
|
268
|
-
) = self._should_skip_clone_only_execution()
|
269
|
-
if should_skip_clone_only_execution:
|
270
|
-
self._logger(skip_reason, system_msg=True)
|
271
|
-
return
|
272
|
-
self._metadata.start_run_heartbeat(self._flow.name, self._run_id)
|
308
|
+
def clone_original_run(self, generate_task_obj=False, verbose=True):
|
273
309
|
self._logger(
|
274
310
|
"Start cloning original run: {}/{}".format(
|
275
311
|
self._flow.name, self._clone_run_id
|
@@ -279,43 +315,106 @@ class NativeRuntime(object):
|
|
279
315
|
|
280
316
|
inputs = []
|
281
317
|
|
318
|
+
ubf_mapper_tasks_to_clone = []
|
319
|
+
# We only clone ubf mapper tasks if the control task is complete.
|
320
|
+
# Here we need to check which control tasks are complete, and then get the corresponding
|
321
|
+
# mapper tasks.
|
282
322
|
for task_ds in self._origin_ds_set:
|
283
323
|
_, step_name, task_id = task_ds.pathspec.split("/")
|
324
|
+
pathspec_index = task_ds.pathspec_index
|
325
|
+
if task_ds["_task_ok"] and step_name != "_parameters":
|
326
|
+
# Only control task can have _control_mapper_tasks. We then store the corresponding mapepr task pathspecs.
|
327
|
+
control_mapper_tasks = (
|
328
|
+
[]
|
329
|
+
if "_control_mapper_tasks" not in task_ds
|
330
|
+
else task_ds["_control_mapper_tasks"]
|
331
|
+
)
|
332
|
+
ubf_mapper_tasks_to_clone.extend(control_mapper_tasks)
|
333
|
+
|
334
|
+
for task_ds in self._origin_ds_set:
|
335
|
+
_, step_name, task_id = task_ds.pathspec.split("/")
|
336
|
+
pathspec_index = task_ds.pathspec_index
|
337
|
+
|
284
338
|
if task_ds["_task_ok"] and step_name != "_parameters":
|
285
|
-
|
339
|
+
# "_unbounded_foreach" is a special flag to indicate that the transition is an unbounded foreach.
|
340
|
+
# Both parent and splitted children tasks will have this flag set. The splitted control/mapper tasks
|
341
|
+
# have no "foreach_param" because UBF is always followed by a join step.
|
342
|
+
is_ubf_task = (
|
343
|
+
"_unbounded_foreach" in task_ds and task_ds["_unbounded_foreach"]
|
344
|
+
) and (self._graph[step_name].foreach_param is None)
|
345
|
+
|
346
|
+
# Only the control task has "_control_mapper_tasks" artifact.
|
347
|
+
is_ubf_control_task = (
|
348
|
+
is_ubf_task
|
349
|
+
and ("_control_mapper_tasks" in task_ds)
|
350
|
+
and task_ds["_control_mapper_tasks"]
|
351
|
+
)
|
352
|
+
is_ubf_mapper_tasks = is_ubf_task and (not is_ubf_control_task)
|
353
|
+
if is_ubf_mapper_tasks and (
|
354
|
+
task_ds.pathspec not in ubf_mapper_tasks_to_clone
|
355
|
+
):
|
356
|
+
# Skip copying UBF mapper tasks if control tasks is incomplete.
|
357
|
+
continue
|
358
|
+
|
359
|
+
ubf_context = None
|
360
|
+
if is_ubf_task:
|
361
|
+
ubf_context = "ubf_test" if is_ubf_mapper_tasks else "ubf_control"
|
362
|
+
inputs.append(
|
363
|
+
(
|
364
|
+
step_name,
|
365
|
+
task_id,
|
366
|
+
pathspec_index,
|
367
|
+
is_ubf_mapper_tasks,
|
368
|
+
ubf_context,
|
369
|
+
)
|
370
|
+
)
|
286
371
|
|
287
372
|
with futures.ThreadPoolExecutor(max_workers=self._max_workers) as executor:
|
288
373
|
all_tasks = [
|
289
|
-
executor.submit(
|
290
|
-
|
374
|
+
executor.submit(
|
375
|
+
self.clone_task,
|
376
|
+
step_name,
|
377
|
+
task_id,
|
378
|
+
pathspec_index,
|
379
|
+
ubf_context=ubf_context,
|
380
|
+
generate_task_obj=generate_task_obj and (not is_ubf_mapper_tasks),
|
381
|
+
verbose=verbose,
|
382
|
+
)
|
383
|
+
for (
|
384
|
+
step_name,
|
385
|
+
task_id,
|
386
|
+
pathspec_index,
|
387
|
+
is_ubf_mapper_tasks,
|
388
|
+
ubf_context,
|
389
|
+
) in inputs
|
291
390
|
]
|
292
391
|
_, _ = futures.wait(all_tasks)
|
293
392
|
self._logger("Cloning original run is done", system_msg=True)
|
294
393
|
self._params_task.mark_resume_done()
|
295
|
-
self._metadata.stop_heartbeat()
|
296
394
|
|
297
395
|
def execute(self):
|
298
|
-
(
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
if should_skip_clone_only_execution:
|
303
|
-
self._logger(skip_reason, system_msg=True)
|
304
|
-
return
|
305
|
-
self._metadata.start_run_heartbeat(self._flow.name, self._run_id)
|
306
|
-
|
307
|
-
if self._params_task:
|
308
|
-
self._queue_push("start", {"input_paths": [self._params_task.path]})
|
396
|
+
if len(self._cloned_tasks) > 0:
|
397
|
+
# mutable list storing the cloned tasks.
|
398
|
+
self._run_queue = []
|
399
|
+
self._active_tasks[0] = 0
|
309
400
|
else:
|
310
|
-
self.
|
311
|
-
|
401
|
+
if self._params_task:
|
402
|
+
self._queue_push("start", {"input_paths": [self._params_task.path]})
|
403
|
+
else:
|
404
|
+
self._queue_push("start", {})
|
312
405
|
progress_tstamp = time.time()
|
313
406
|
try:
|
314
407
|
# main scheduling loop
|
315
408
|
exception = None
|
316
|
-
while self._run_queue or self._active_tasks[0] > 0:
|
409
|
+
while self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks:
|
317
410
|
# 1. are any of the current workers finished?
|
318
|
-
|
411
|
+
if self._cloned_tasks:
|
412
|
+
finished_tasks = self._cloned_tasks
|
413
|
+
# reset the list of cloned tasks and let poll_workers handle
|
414
|
+
# the remaining transition
|
415
|
+
self._cloned_tasks = []
|
416
|
+
else:
|
417
|
+
finished_tasks = list(self._poll_workers())
|
319
418
|
# 2. push new tasks triggered by the finished tasks to the queue
|
320
419
|
self._queue_tasks(finished_tasks)
|
321
420
|
# 3. if there are available worker slots, pop and start tasks
|
@@ -381,8 +480,6 @@ class NativeRuntime(object):
|
|
381
480
|
for deco in step.decorators:
|
382
481
|
deco.runtime_finished(exception)
|
383
482
|
|
384
|
-
self._metadata.stop_heartbeat()
|
385
|
-
|
386
483
|
# assert that end was executed and it was successful
|
387
484
|
if ("end", ()) in self._finished:
|
388
485
|
if self._run_url:
|
@@ -432,9 +529,41 @@ class NativeRuntime(object):
|
|
432
529
|
for _ in range(3):
|
433
530
|
list(self._poll_workers())
|
434
531
|
|
532
|
+
# Given the current task information (task_index), the type of transition,
|
533
|
+
# and the split index, return the new task index.
|
534
|
+
def _translate_index(self, task, next_step, type, split_index=None):
|
535
|
+
import re
|
536
|
+
|
537
|
+
match = re.match(r"^(.+)\[(.*)\]$", task.task_index)
|
538
|
+
if match:
|
539
|
+
_, foreach_index = match.groups()
|
540
|
+
# Convert foreach_index to a list of integers
|
541
|
+
if len(foreach_index) > 0:
|
542
|
+
foreach_index = foreach_index.split(",")
|
543
|
+
else:
|
544
|
+
foreach_index = []
|
545
|
+
else:
|
546
|
+
raise ValueError(
|
547
|
+
"Index not in the format of {run_id}/{step_name}[{foreach_index}]"
|
548
|
+
)
|
549
|
+
if type == "linear":
|
550
|
+
return "%s[%s]" % (next_step, ",".join(foreach_index))
|
551
|
+
elif type == "join":
|
552
|
+
indices = []
|
553
|
+
if len(foreach_index) > 0:
|
554
|
+
indices = foreach_index[:-1]
|
555
|
+
return "%s[%s]" % (next_step, ",".join(indices))
|
556
|
+
elif type == "split":
|
557
|
+
foreach_index.append(str(split_index))
|
558
|
+
return "%s[%s]" % (next_step, ",".join(foreach_index))
|
559
|
+
|
435
560
|
# Store the parameters needed for task creation, so that pushing on items
|
436
561
|
# onto the run_queue is an inexpensive operation.
|
437
|
-
def _queue_push(self, step, task_kwargs):
|
562
|
+
def _queue_push(self, step, task_kwargs, index=None):
|
563
|
+
# If the to-be-pushed task is already cloned before, we don't need
|
564
|
+
# to re-run it.
|
565
|
+
if index and index in self._cloned_task_index:
|
566
|
+
return
|
438
567
|
self._run_queue.insert(0, (step, task_kwargs))
|
439
568
|
# For foreaches, this will happen multiple time but is ok, becomes a no-op
|
440
569
|
self._unprocessed_steps.discard(step)
|
@@ -493,30 +622,19 @@ class NativeRuntime(object):
|
|
493
622
|
)
|
494
623
|
num_splits = len(mapper_tasks)
|
495
624
|
self._control_num_splits[task.path] = num_splits
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
)
|
510
|
-
else:
|
511
|
-
# Update _finished since these tasks were successfully
|
512
|
-
# run elsewhere so that join will be unblocked.
|
513
|
-
_, foreach_stack = task.finished_id
|
514
|
-
top = foreach_stack[-1]
|
515
|
-
bottom = list(foreach_stack[:-1])
|
516
|
-
for i in range(num_splits):
|
517
|
-
s = tuple(bottom + [top._replace(index=i)])
|
518
|
-
self._finished[(task.step, s)] = mapper_tasks[i]
|
519
|
-
self._is_cloned[mapper_tasks[i]] = False
|
625
|
+
|
626
|
+
# If the control task is cloned, all mapper tasks should have been cloned
|
627
|
+
# as well, so we no longer need to handle cloning of mapper tasks in runtime.
|
628
|
+
|
629
|
+
# Update _finished since these tasks were successfully
|
630
|
+
# run elsewhere so that join will be unblocked.
|
631
|
+
_, foreach_stack = task.finished_id
|
632
|
+
top = foreach_stack[-1]
|
633
|
+
bottom = list(foreach_stack[:-1])
|
634
|
+
for i in range(num_splits):
|
635
|
+
s = tuple(bottom + [top._replace(index=i)])
|
636
|
+
self._finished[(task.step, s)] = mapper_tasks[i]
|
637
|
+
self._is_cloned[mapper_tasks[i]] = False
|
520
638
|
|
521
639
|
# Find and check status of control task and retrieve its pathspec
|
522
640
|
# for retrieving unbounded foreach cardinality.
|
@@ -541,16 +659,18 @@ class NativeRuntime(object):
|
|
541
659
|
required_tasks.append(self._finished.get((task.step, s)))
|
542
660
|
|
543
661
|
if all(required_tasks):
|
662
|
+
index = self._translate_index(task, next_step, "join")
|
544
663
|
# all tasks to be joined are ready. Schedule the next join step.
|
545
664
|
self._queue_push(
|
546
665
|
next_step,
|
547
666
|
{"input_paths": required_tasks, "join_type": "foreach"},
|
667
|
+
index,
|
548
668
|
)
|
549
669
|
else:
|
550
670
|
# matching_split is the split-parent of the finished task
|
551
671
|
matching_split = self._graph[self._graph[next_step].split_parents[-1]]
|
552
672
|
_, foreach_stack = task.finished_id
|
553
|
-
|
673
|
+
index = ""
|
554
674
|
if matching_split.type == "foreach":
|
555
675
|
# next step is a foreach join
|
556
676
|
|
@@ -565,6 +685,7 @@ class NativeRuntime(object):
|
|
565
685
|
self._finished.get((task.step, s)) for s in siblings(foreach_stack)
|
566
686
|
]
|
567
687
|
join_type = "foreach"
|
688
|
+
index = self._translate_index(task, next_step, "join")
|
568
689
|
else:
|
569
690
|
# next step is a split
|
570
691
|
# required tasks are all branches joined by the next step
|
@@ -573,11 +694,14 @@ class NativeRuntime(object):
|
|
573
694
|
for step in self._graph[next_step].in_funcs
|
574
695
|
]
|
575
696
|
join_type = "linear"
|
697
|
+
index = self._translate_index(task, next_step, "linear")
|
576
698
|
|
577
699
|
if all(required_tasks):
|
578
700
|
# all tasks to be joined are ready. Schedule the next join step.
|
579
701
|
self._queue_push(
|
580
|
-
next_step,
|
702
|
+
next_step,
|
703
|
+
{"input_paths": required_tasks, "join_type": join_type},
|
704
|
+
index,
|
581
705
|
)
|
582
706
|
|
583
707
|
def _queue_task_foreach(self, task, next_steps):
|
@@ -598,6 +722,12 @@ class NativeRuntime(object):
|
|
598
722
|
# Need to push control process related task.
|
599
723
|
ubf_iter_name = task.results.get("_foreach_var")
|
600
724
|
ubf_iter = task.results.get(ubf_iter_name)
|
725
|
+
# UBF control task has no split index, hence "None" as place holder.
|
726
|
+
|
727
|
+
if task.results.get("_control_task_is_mapper_zero", False):
|
728
|
+
index = self._translate_index(task, next_step, "split", 0)
|
729
|
+
else:
|
730
|
+
index = self._translate_index(task, next_step, "split", None)
|
601
731
|
self._queue_push(
|
602
732
|
next_step,
|
603
733
|
{
|
@@ -605,6 +735,7 @@ class NativeRuntime(object):
|
|
605
735
|
"ubf_context": UBF_CONTROL,
|
606
736
|
"ubf_iter": ubf_iter,
|
607
737
|
},
|
738
|
+
index,
|
608
739
|
)
|
609
740
|
else:
|
610
741
|
num_splits = task.results["_foreach_num_splits"]
|
@@ -624,8 +755,11 @@ class NativeRuntime(object):
|
|
624
755
|
|
625
756
|
# schedule all splits
|
626
757
|
for i in range(num_splits):
|
758
|
+
index = self._translate_index(task, next_step, "split", i)
|
627
759
|
self._queue_push(
|
628
|
-
next_step,
|
760
|
+
next_step,
|
761
|
+
{"split_index": str(i), "input_paths": [task.path]},
|
762
|
+
index,
|
629
763
|
)
|
630
764
|
|
631
765
|
def _queue_tasks(self, finished_tasks):
|
@@ -673,7 +807,8 @@ class NativeRuntime(object):
|
|
673
807
|
else:
|
674
808
|
# Next steps are normal linear steps
|
675
809
|
for step in next_steps:
|
676
|
-
self.
|
810
|
+
index = self._translate_index(task, step, "linear")
|
811
|
+
self._queue_push(step, {"input_paths": [task.path]}, index)
|
677
812
|
|
678
813
|
def _poll_workers(self):
|
679
814
|
if self._workers:
|
@@ -794,6 +929,7 @@ class Task(object):
|
|
794
929
|
join_type=None,
|
795
930
|
task_id=None,
|
796
931
|
resume_identifier=None,
|
932
|
+
pathspec_index=None,
|
797
933
|
):
|
798
934
|
self.step = step
|
799
935
|
self.flow = flow
|
@@ -836,10 +972,9 @@ class Task(object):
|
|
836
972
|
self._is_resume_leader = None
|
837
973
|
self._resume_done = None
|
838
974
|
self._resume_identifier = resume_identifier
|
839
|
-
|
840
975
|
origin = None
|
841
976
|
if clone_run_id and may_clone:
|
842
|
-
origin = self._find_origin_task(clone_run_id, join_type)
|
977
|
+
origin = self._find_origin_task(clone_run_id, join_type, pathspec_index)
|
843
978
|
if origin and origin["_task_ok"]:
|
844
979
|
# At this point, we know we are going to clone
|
845
980
|
self._is_cloned = True
|
@@ -960,10 +1095,11 @@ class Task(object):
|
|
960
1095
|
)
|
961
1096
|
|
962
1097
|
if self._is_resume_leader:
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
1098
|
+
if reentrant:
|
1099
|
+
self.log(
|
1100
|
+
"Selected as the reentrant clone leader.",
|
1101
|
+
system_msg=True,
|
1102
|
+
)
|
967
1103
|
# Clone in place without relying on run_queue.
|
968
1104
|
self.new_attempt()
|
969
1105
|
self._ds.clone(origin)
|
@@ -1108,63 +1244,34 @@ class Task(object):
|
|
1108
1244
|
|
1109
1245
|
def _get_task_id(self, task_id):
|
1110
1246
|
already_existed = True
|
1247
|
+
tags = []
|
1111
1248
|
if self.ubf_context == UBF_CONTROL:
|
1112
|
-
|
1113
|
-
|
1114
|
-
# We associate the control task-id to be 1:1 with the split node
|
1115
|
-
# where the unbounded-foreach was defined.
|
1116
|
-
# We prefer encoding the corresponding split into the task_id of
|
1117
|
-
# the control node; so it has access to this information quite
|
1118
|
-
# easily. There is anyway a corresponding int id stored in the
|
1119
|
-
# metadata backend - so this should be fine.
|
1120
|
-
task_id = "control-%s-%s-%s" % (run, input_step, input_task)
|
1121
|
-
# Register only regular Metaflow (non control) tasks.
|
1249
|
+
tags = [CONTROL_TASK_TAG]
|
1250
|
+
# Register Metaflow tasks.
|
1122
1251
|
if task_id is None:
|
1123
|
-
task_id = str(
|
1252
|
+
task_id = str(
|
1253
|
+
self.metadata.new_task_id(self.run_id, self.step, sys_tags=tags)
|
1254
|
+
)
|
1124
1255
|
already_existed = False
|
1125
1256
|
else:
|
1126
|
-
# task_id is preset only by persist_constants()
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
attempt_id,
|
1135
|
-
sys_tags=tags,
|
1136
|
-
)
|
1137
|
-
# A Task's tags are now those of its ancestral Run, so we are not able
|
1138
|
-
# to rely on a task's tags to indicate the presence of a control task
|
1139
|
-
# so, on top of adding the tags above, we also add a task metadata
|
1140
|
-
# entry indicating that this is a "control task".
|
1141
|
-
#
|
1142
|
-
# Here we will also add a task metadata entry to indicate "control task".
|
1143
|
-
# Within the metaflow repo, the only dependency of such a "control task"
|
1144
|
-
# indicator is in the integration test suite (see Step.control_tasks() in
|
1145
|
-
# client API).
|
1146
|
-
task_metadata_list = [
|
1147
|
-
MetaDatum(
|
1148
|
-
field="internal_task_type",
|
1149
|
-
value=CONTROL_TASK_TAG,
|
1150
|
-
type="internal_task_type",
|
1151
|
-
tags=["attempt_id:{0}".format(attempt_id)],
|
1152
|
-
)
|
1153
|
-
]
|
1154
|
-
self.metadata.register_metadata(
|
1155
|
-
self.run_id, self.step, task_id, task_metadata_list
|
1156
|
-
)
|
1157
|
-
else:
|
1158
|
-
already_existed = not self.metadata.register_task_id(
|
1159
|
-
self.run_id, self.step, task_id, 0
|
1160
|
-
)
|
1257
|
+
# task_id is preset only by persist_constants().
|
1258
|
+
already_existed = not self.metadata.register_task_id(
|
1259
|
+
self.run_id,
|
1260
|
+
self.step,
|
1261
|
+
task_id,
|
1262
|
+
0,
|
1263
|
+
sys_tags=tags,
|
1264
|
+
)
|
1161
1265
|
|
1162
1266
|
self.task_id = task_id
|
1163
1267
|
self._path = "%s/%s/%s" % (self.run_id, self.step, self.task_id)
|
1164
1268
|
return already_existed
|
1165
1269
|
|
1166
|
-
def _find_origin_task(self, clone_run_id, join_type):
|
1167
|
-
if
|
1270
|
+
def _find_origin_task(self, clone_run_id, join_type, pathspec_index=None):
|
1271
|
+
if pathspec_index:
|
1272
|
+
origin = self.origin_ds_set.get_with_pathspec_index(pathspec_index)
|
1273
|
+
return origin
|
1274
|
+
elif self.step == "_parameters":
|
1168
1275
|
pathspec = "%s/_parameters[]" % clone_run_id
|
1169
1276
|
origin = self.origin_ds_set.get_with_pathspec_index(pathspec)
|
1170
1277
|
|
@@ -1214,6 +1321,11 @@ class Task(object):
|
|
1214
1321
|
)
|
1215
1322
|
return self._results_ds
|
1216
1323
|
|
1324
|
+
@property
|
1325
|
+
def task_index(self):
|
1326
|
+
_, task_index = self.results.pathspec_index.split("/")
|
1327
|
+
return task_index
|
1328
|
+
|
1217
1329
|
@property
|
1218
1330
|
def finished_id(self):
|
1219
1331
|
# note: id is not available before the task has finished.
|
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.12.
|
1
|
+
metaflow_version = "2.12.9"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metaflow
|
3
|
-
Version: 2.12.
|
3
|
+
Version: 2.12.9
|
4
4
|
Summary: Metaflow: More Data Science, Less Engineering
|
5
5
|
Author: Metaflow Developers
|
6
6
|
Author-email: help@metaflow.org
|
@@ -26,7 +26,7 @@ License-File: LICENSE
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: boto3
|
28
28
|
Provides-Extra: stubs
|
29
|
-
Requires-Dist: metaflow-stubs ==2.12.
|
29
|
+
Requires-Dist: metaflow-stubs ==2.12.9 ; extra == 'stubs'
|
30
30
|
|
31
31
|

|
32
32
|
|