metaflow 2.12.8__py2.py3-none-any.whl → 2.12.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. metaflow/__init__.py +2 -0
  2. metaflow/cli.py +12 -4
  3. metaflow/extension_support/plugins.py +1 -0
  4. metaflow/flowspec.py +8 -1
  5. metaflow/lint.py +13 -0
  6. metaflow/metaflow_current.py +0 -8
  7. metaflow/plugins/__init__.py +12 -0
  8. metaflow/plugins/argo/argo_workflows.py +462 -42
  9. metaflow/plugins/argo/argo_workflows_cli.py +60 -3
  10. metaflow/plugins/argo/argo_workflows_decorator.py +38 -7
  11. metaflow/plugins/argo/argo_workflows_deployer.py +290 -0
  12. metaflow/plugins/argo/jobset_input_paths.py +16 -0
  13. metaflow/plugins/aws/batch/batch_decorator.py +16 -13
  14. metaflow/plugins/aws/step_functions/step_functions_cli.py +45 -3
  15. metaflow/plugins/aws/step_functions/step_functions_deployer.py +251 -0
  16. metaflow/plugins/cards/card_cli.py +1 -1
  17. metaflow/plugins/kubernetes/kubernetes.py +279 -52
  18. metaflow/plugins/kubernetes/kubernetes_cli.py +26 -8
  19. metaflow/plugins/kubernetes/kubernetes_client.py +0 -1
  20. metaflow/plugins/kubernetes/kubernetes_decorator.py +56 -44
  21. metaflow/plugins/kubernetes/kubernetes_job.py +6 -6
  22. metaflow/plugins/kubernetes/kubernetes_jobsets.py +510 -272
  23. metaflow/plugins/parallel_decorator.py +108 -8
  24. metaflow/plugins/secrets/secrets_decorator.py +12 -3
  25. metaflow/plugins/test_unbounded_foreach_decorator.py +39 -4
  26. metaflow/runner/deployer.py +386 -0
  27. metaflow/runner/metaflow_runner.py +1 -20
  28. metaflow/runner/nbdeploy.py +130 -0
  29. metaflow/runner/nbrun.py +4 -28
  30. metaflow/runner/utils.py +49 -0
  31. metaflow/runtime.py +246 -134
  32. metaflow/version.py +1 -1
  33. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/METADATA +2 -2
  34. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/RECORD +38 -32
  35. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/WHEEL +1 -1
  36. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/LICENSE +0 -0
  37. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/entry_points.txt +0 -0
  38. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/top_level.txt +0 -0
metaflow/runtime.py CHANGED
@@ -16,6 +16,7 @@ from functools import partial
16
16
  from concurrent import futures
17
17
 
18
18
  from metaflow.datastore.exceptions import DataException
19
+ from contextlib import contextmanager
19
20
 
20
21
  from . import get_namespace
21
22
  from .metadata import MetaDatum
@@ -109,6 +110,8 @@ class NativeRuntime(object):
109
110
  self._clone_run_id = clone_run_id
110
111
  self._clone_only = clone_only
111
112
  self._clone_steps = {} if clone_steps is None else clone_steps
113
+ self._cloned_tasks = []
114
+ self._cloned_task_index = set()
112
115
  self._reentrant = reentrant
113
116
  self._run_url = None
114
117
 
@@ -203,6 +206,22 @@ class NativeRuntime(object):
203
206
 
204
207
  self._is_cloned[self._params_task.path] = self._params_task.is_cloned
205
208
 
209
+ def should_skip_clone_only_execution(self):
210
+ (
211
+ should_skip_clone_only_execution,
212
+ skip_reason,
213
+ ) = self._should_skip_clone_only_execution()
214
+ if should_skip_clone_only_execution:
215
+ self._logger(skip_reason, system_msg=True)
216
+ return True
217
+ return False
218
+
219
+ @contextmanager
220
+ def run_heartbeat(self):
221
+ self._metadata.start_run_heartbeat(self._flow.name, self._run_id)
222
+ yield
223
+ self._metadata.stop_heartbeat()
224
+
206
225
  def print_workflow_info(self):
207
226
  self._run_url = (
208
227
  "%s/%s/%s" % (UI_URL.rstrip("/"), self._flow.name, self._run_id)
@@ -235,41 +254,58 @@ class NativeRuntime(object):
235
254
  )
236
255
  return False, None
237
256
 
238
- def clone_task(self, step_name, task_id):
239
- self._logger(
240
- "Cloning task from {}/{}/{}/{} to {}/{}/{}/{}".format(
257
+ def clone_task(
258
+ self,
259
+ step_name,
260
+ task_id,
261
+ pathspec_index,
262
+ ubf_context,
263
+ generate_task_obj,
264
+ verbose=False,
265
+ ):
266
+ try:
267
+ new_task_id = task_id
268
+ if generate_task_obj:
269
+ task = self._new_task(step_name, pathspec_index=pathspec_index)
270
+ if ubf_context:
271
+ task.ubf_context = ubf_context
272
+ new_task_id = task.task_id
273
+ self._cloned_tasks.append(task)
274
+ self._cloned_task_index.add(task.task_index)
275
+
276
+ if verbose:
277
+ self._logger(
278
+ "Cloning task from {}/{}/{}/{} to {}/{}/{}/{}".format(
279
+ self._flow.name,
280
+ self._clone_run_id,
281
+ step_name,
282
+ task_id,
283
+ self._flow.name,
284
+ self._run_id,
285
+ step_name,
286
+ new_task_id,
287
+ ),
288
+ system_msg=True,
289
+ )
290
+ clone_task_helper(
241
291
  self._flow.name,
242
292
  self._clone_run_id,
243
- step_name,
244
- task_id,
245
- self._flow.name,
246
293
  self._run_id,
247
294
  step_name,
248
- task_id,
249
- ),
250
- system_msg=True,
251
- )
252
- clone_task_helper(
253
- self._flow.name,
254
- self._clone_run_id,
255
- self._run_id,
256
- step_name,
257
- task_id, # origin_task_id
258
- task_id,
259
- self._flow_datastore,
260
- self._metadata,
261
- origin_ds_set=self._origin_ds_set,
262
- )
295
+ task_id, # origin_task_id
296
+ new_task_id,
297
+ self._flow_datastore,
298
+ self._metadata,
299
+ origin_ds_set=self._origin_ds_set,
300
+ )
301
+ except Exception as e:
302
+ self._logger(
303
+ "Cloning task from {}/{}/{} failed with error: {}".format(
304
+ self._clone_run_id, step_name, task_id, str(e)
305
+ )
306
+ )
263
307
 
264
- def clone_original_run(self):
265
- (
266
- should_skip_clone_only_execution,
267
- skip_reason,
268
- ) = self._should_skip_clone_only_execution()
269
- if should_skip_clone_only_execution:
270
- self._logger(skip_reason, system_msg=True)
271
- return
272
- self._metadata.start_run_heartbeat(self._flow.name, self._run_id)
308
+ def clone_original_run(self, generate_task_obj=False, verbose=True):
273
309
  self._logger(
274
310
  "Start cloning original run: {}/{}".format(
275
311
  self._flow.name, self._clone_run_id
@@ -279,43 +315,106 @@ class NativeRuntime(object):
279
315
 
280
316
  inputs = []
281
317
 
318
+ ubf_mapper_tasks_to_clone = []
319
+ # We only clone ubf mapper tasks if the control task is complete.
320
+ # Here we need to check which control tasks are complete, and then get the corresponding
321
+ # mapper tasks.
282
322
  for task_ds in self._origin_ds_set:
283
323
  _, step_name, task_id = task_ds.pathspec.split("/")
324
+ pathspec_index = task_ds.pathspec_index
325
+ if task_ds["_task_ok"] and step_name != "_parameters":
326
+ # Only control task can have _control_mapper_tasks. We then store the corresponding mapepr task pathspecs.
327
+ control_mapper_tasks = (
328
+ []
329
+ if "_control_mapper_tasks" not in task_ds
330
+ else task_ds["_control_mapper_tasks"]
331
+ )
332
+ ubf_mapper_tasks_to_clone.extend(control_mapper_tasks)
333
+
334
+ for task_ds in self._origin_ds_set:
335
+ _, step_name, task_id = task_ds.pathspec.split("/")
336
+ pathspec_index = task_ds.pathspec_index
337
+
284
338
  if task_ds["_task_ok"] and step_name != "_parameters":
285
- inputs.append((step_name, task_id))
339
+ # "_unbounded_foreach" is a special flag to indicate that the transition is an unbounded foreach.
340
+ # Both parent and splitted children tasks will have this flag set. The splitted control/mapper tasks
341
+ # have no "foreach_param" because UBF is always followed by a join step.
342
+ is_ubf_task = (
343
+ "_unbounded_foreach" in task_ds and task_ds["_unbounded_foreach"]
344
+ ) and (self._graph[step_name].foreach_param is None)
345
+
346
+ # Only the control task has "_control_mapper_tasks" artifact.
347
+ is_ubf_control_task = (
348
+ is_ubf_task
349
+ and ("_control_mapper_tasks" in task_ds)
350
+ and task_ds["_control_mapper_tasks"]
351
+ )
352
+ is_ubf_mapper_tasks = is_ubf_task and (not is_ubf_control_task)
353
+ if is_ubf_mapper_tasks and (
354
+ task_ds.pathspec not in ubf_mapper_tasks_to_clone
355
+ ):
356
+ # Skip copying UBF mapper tasks if control tasks is incomplete.
357
+ continue
358
+
359
+ ubf_context = None
360
+ if is_ubf_task:
361
+ ubf_context = "ubf_test" if is_ubf_mapper_tasks else "ubf_control"
362
+ inputs.append(
363
+ (
364
+ step_name,
365
+ task_id,
366
+ pathspec_index,
367
+ is_ubf_mapper_tasks,
368
+ ubf_context,
369
+ )
370
+ )
286
371
 
287
372
  with futures.ThreadPoolExecutor(max_workers=self._max_workers) as executor:
288
373
  all_tasks = [
289
- executor.submit(self.clone_task, step_name, task_id)
290
- for (step_name, task_id) in inputs
374
+ executor.submit(
375
+ self.clone_task,
376
+ step_name,
377
+ task_id,
378
+ pathspec_index,
379
+ ubf_context=ubf_context,
380
+ generate_task_obj=generate_task_obj and (not is_ubf_mapper_tasks),
381
+ verbose=verbose,
382
+ )
383
+ for (
384
+ step_name,
385
+ task_id,
386
+ pathspec_index,
387
+ is_ubf_mapper_tasks,
388
+ ubf_context,
389
+ ) in inputs
291
390
  ]
292
391
  _, _ = futures.wait(all_tasks)
293
392
  self._logger("Cloning original run is done", system_msg=True)
294
393
  self._params_task.mark_resume_done()
295
- self._metadata.stop_heartbeat()
296
394
 
297
395
  def execute(self):
298
- (
299
- should_skip_clone_only_execution,
300
- skip_reason,
301
- ) = self._should_skip_clone_only_execution()
302
- if should_skip_clone_only_execution:
303
- self._logger(skip_reason, system_msg=True)
304
- return
305
- self._metadata.start_run_heartbeat(self._flow.name, self._run_id)
306
-
307
- if self._params_task:
308
- self._queue_push("start", {"input_paths": [self._params_task.path]})
396
+ if len(self._cloned_tasks) > 0:
397
+ # mutable list storing the cloned tasks.
398
+ self._run_queue = []
399
+ self._active_tasks[0] = 0
309
400
  else:
310
- self._queue_push("start", {})
311
-
401
+ if self._params_task:
402
+ self._queue_push("start", {"input_paths": [self._params_task.path]})
403
+ else:
404
+ self._queue_push("start", {})
312
405
  progress_tstamp = time.time()
313
406
  try:
314
407
  # main scheduling loop
315
408
  exception = None
316
- while self._run_queue or self._active_tasks[0] > 0:
409
+ while self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks:
317
410
  # 1. are any of the current workers finished?
318
- finished_tasks = list(self._poll_workers())
411
+ if self._cloned_tasks:
412
+ finished_tasks = self._cloned_tasks
413
+ # reset the list of cloned tasks and let poll_workers handle
414
+ # the remaining transition
415
+ self._cloned_tasks = []
416
+ else:
417
+ finished_tasks = list(self._poll_workers())
319
418
  # 2. push new tasks triggered by the finished tasks to the queue
320
419
  self._queue_tasks(finished_tasks)
321
420
  # 3. if there are available worker slots, pop and start tasks
@@ -381,8 +480,6 @@ class NativeRuntime(object):
381
480
  for deco in step.decorators:
382
481
  deco.runtime_finished(exception)
383
482
 
384
- self._metadata.stop_heartbeat()
385
-
386
483
  # assert that end was executed and it was successful
387
484
  if ("end", ()) in self._finished:
388
485
  if self._run_url:
@@ -432,9 +529,41 @@ class NativeRuntime(object):
432
529
  for _ in range(3):
433
530
  list(self._poll_workers())
434
531
 
532
+ # Given the current task information (task_index), the type of transition,
533
+ # and the split index, return the new task index.
534
+ def _translate_index(self, task, next_step, type, split_index=None):
535
+ import re
536
+
537
+ match = re.match(r"^(.+)\[(.*)\]$", task.task_index)
538
+ if match:
539
+ _, foreach_index = match.groups()
540
+ # Convert foreach_index to a list of integers
541
+ if len(foreach_index) > 0:
542
+ foreach_index = foreach_index.split(",")
543
+ else:
544
+ foreach_index = []
545
+ else:
546
+ raise ValueError(
547
+ "Index not in the format of {run_id}/{step_name}[{foreach_index}]"
548
+ )
549
+ if type == "linear":
550
+ return "%s[%s]" % (next_step, ",".join(foreach_index))
551
+ elif type == "join":
552
+ indices = []
553
+ if len(foreach_index) > 0:
554
+ indices = foreach_index[:-1]
555
+ return "%s[%s]" % (next_step, ",".join(indices))
556
+ elif type == "split":
557
+ foreach_index.append(str(split_index))
558
+ return "%s[%s]" % (next_step, ",".join(foreach_index))
559
+
435
560
  # Store the parameters needed for task creation, so that pushing on items
436
561
  # onto the run_queue is an inexpensive operation.
437
- def _queue_push(self, step, task_kwargs):
562
+ def _queue_push(self, step, task_kwargs, index=None):
563
+ # If the to-be-pushed task is already cloned before, we don't need
564
+ # to re-run it.
565
+ if index and index in self._cloned_task_index:
566
+ return
438
567
  self._run_queue.insert(0, (step, task_kwargs))
439
568
  # For foreaches, this will happen multiple time but is ok, becomes a no-op
440
569
  self._unprocessed_steps.discard(step)
@@ -493,30 +622,19 @@ class NativeRuntime(object):
493
622
  )
494
623
  num_splits = len(mapper_tasks)
495
624
  self._control_num_splits[task.path] = num_splits
496
- if task.is_cloned:
497
- # Add mapper tasks to be cloned.
498
- for i in range(num_splits):
499
- # NOTE: For improved robustness, introduce
500
- # `clone_options` as an enum so that we can force that
501
- # clone must occur for this task.
502
- self._queue_push(
503
- task.step,
504
- {
505
- "input_paths": task.input_paths,
506
- "split_index": str(i),
507
- "ubf_context": UBF_TASK,
508
- },
509
- )
510
- else:
511
- # Update _finished since these tasks were successfully
512
- # run elsewhere so that join will be unblocked.
513
- _, foreach_stack = task.finished_id
514
- top = foreach_stack[-1]
515
- bottom = list(foreach_stack[:-1])
516
- for i in range(num_splits):
517
- s = tuple(bottom + [top._replace(index=i)])
518
- self._finished[(task.step, s)] = mapper_tasks[i]
519
- self._is_cloned[mapper_tasks[i]] = False
625
+
626
+ # If the control task is cloned, all mapper tasks should have been cloned
627
+ # as well, so we no longer need to handle cloning of mapper tasks in runtime.
628
+
629
+ # Update _finished since these tasks were successfully
630
+ # run elsewhere so that join will be unblocked.
631
+ _, foreach_stack = task.finished_id
632
+ top = foreach_stack[-1]
633
+ bottom = list(foreach_stack[:-1])
634
+ for i in range(num_splits):
635
+ s = tuple(bottom + [top._replace(index=i)])
636
+ self._finished[(task.step, s)] = mapper_tasks[i]
637
+ self._is_cloned[mapper_tasks[i]] = False
520
638
 
521
639
  # Find and check status of control task and retrieve its pathspec
522
640
  # for retrieving unbounded foreach cardinality.
@@ -541,16 +659,18 @@ class NativeRuntime(object):
541
659
  required_tasks.append(self._finished.get((task.step, s)))
542
660
 
543
661
  if all(required_tasks):
662
+ index = self._translate_index(task, next_step, "join")
544
663
  # all tasks to be joined are ready. Schedule the next join step.
545
664
  self._queue_push(
546
665
  next_step,
547
666
  {"input_paths": required_tasks, "join_type": "foreach"},
667
+ index,
548
668
  )
549
669
  else:
550
670
  # matching_split is the split-parent of the finished task
551
671
  matching_split = self._graph[self._graph[next_step].split_parents[-1]]
552
672
  _, foreach_stack = task.finished_id
553
-
673
+ index = ""
554
674
  if matching_split.type == "foreach":
555
675
  # next step is a foreach join
556
676
 
@@ -565,6 +685,7 @@ class NativeRuntime(object):
565
685
  self._finished.get((task.step, s)) for s in siblings(foreach_stack)
566
686
  ]
567
687
  join_type = "foreach"
688
+ index = self._translate_index(task, next_step, "join")
568
689
  else:
569
690
  # next step is a split
570
691
  # required tasks are all branches joined by the next step
@@ -573,11 +694,14 @@ class NativeRuntime(object):
573
694
  for step in self._graph[next_step].in_funcs
574
695
  ]
575
696
  join_type = "linear"
697
+ index = self._translate_index(task, next_step, "linear")
576
698
 
577
699
  if all(required_tasks):
578
700
  # all tasks to be joined are ready. Schedule the next join step.
579
701
  self._queue_push(
580
- next_step, {"input_paths": required_tasks, "join_type": join_type}
702
+ next_step,
703
+ {"input_paths": required_tasks, "join_type": join_type},
704
+ index,
581
705
  )
582
706
 
583
707
  def _queue_task_foreach(self, task, next_steps):
@@ -598,6 +722,12 @@ class NativeRuntime(object):
598
722
  # Need to push control process related task.
599
723
  ubf_iter_name = task.results.get("_foreach_var")
600
724
  ubf_iter = task.results.get(ubf_iter_name)
725
+ # UBF control task has no split index, hence "None" as place holder.
726
+
727
+ if task.results.get("_control_task_is_mapper_zero", False):
728
+ index = self._translate_index(task, next_step, "split", 0)
729
+ else:
730
+ index = self._translate_index(task, next_step, "split", None)
601
731
  self._queue_push(
602
732
  next_step,
603
733
  {
@@ -605,6 +735,7 @@ class NativeRuntime(object):
605
735
  "ubf_context": UBF_CONTROL,
606
736
  "ubf_iter": ubf_iter,
607
737
  },
738
+ index,
608
739
  )
609
740
  else:
610
741
  num_splits = task.results["_foreach_num_splits"]
@@ -624,8 +755,11 @@ class NativeRuntime(object):
624
755
 
625
756
  # schedule all splits
626
757
  for i in range(num_splits):
758
+ index = self._translate_index(task, next_step, "split", i)
627
759
  self._queue_push(
628
- next_step, {"split_index": str(i), "input_paths": [task.path]}
760
+ next_step,
761
+ {"split_index": str(i), "input_paths": [task.path]},
762
+ index,
629
763
  )
630
764
 
631
765
  def _queue_tasks(self, finished_tasks):
@@ -673,7 +807,8 @@ class NativeRuntime(object):
673
807
  else:
674
808
  # Next steps are normal linear steps
675
809
  for step in next_steps:
676
- self._queue_push(step, {"input_paths": [task.path]})
810
+ index = self._translate_index(task, step, "linear")
811
+ self._queue_push(step, {"input_paths": [task.path]}, index)
677
812
 
678
813
  def _poll_workers(self):
679
814
  if self._workers:
@@ -794,6 +929,7 @@ class Task(object):
794
929
  join_type=None,
795
930
  task_id=None,
796
931
  resume_identifier=None,
932
+ pathspec_index=None,
797
933
  ):
798
934
  self.step = step
799
935
  self.flow = flow
@@ -836,10 +972,9 @@ class Task(object):
836
972
  self._is_resume_leader = None
837
973
  self._resume_done = None
838
974
  self._resume_identifier = resume_identifier
839
-
840
975
  origin = None
841
976
  if clone_run_id and may_clone:
842
- origin = self._find_origin_task(clone_run_id, join_type)
977
+ origin = self._find_origin_task(clone_run_id, join_type, pathspec_index)
843
978
  if origin and origin["_task_ok"]:
844
979
  # At this point, we know we are going to clone
845
980
  self._is_cloned = True
@@ -960,10 +1095,11 @@ class Task(object):
960
1095
  )
961
1096
 
962
1097
  if self._is_resume_leader:
963
- self.log(
964
- "Selected as the reentrant clone leader.",
965
- system_msg=True,
966
- )
1098
+ if reentrant:
1099
+ self.log(
1100
+ "Selected as the reentrant clone leader.",
1101
+ system_msg=True,
1102
+ )
967
1103
  # Clone in place without relying on run_queue.
968
1104
  self.new_attempt()
969
1105
  self._ds.clone(origin)
@@ -1108,63 +1244,34 @@ class Task(object):
1108
1244
 
1109
1245
  def _get_task_id(self, task_id):
1110
1246
  already_existed = True
1247
+ tags = []
1111
1248
  if self.ubf_context == UBF_CONTROL:
1112
- [input_path] = self.input_paths
1113
- run, input_step, input_task = input_path.split("/")
1114
- # We associate the control task-id to be 1:1 with the split node
1115
- # where the unbounded-foreach was defined.
1116
- # We prefer encoding the corresponding split into the task_id of
1117
- # the control node; so it has access to this information quite
1118
- # easily. There is anyway a corresponding int id stored in the
1119
- # metadata backend - so this should be fine.
1120
- task_id = "control-%s-%s-%s" % (run, input_step, input_task)
1121
- # Register only regular Metaflow (non control) tasks.
1249
+ tags = [CONTROL_TASK_TAG]
1250
+ # Register Metaflow tasks.
1122
1251
  if task_id is None:
1123
- task_id = str(self.metadata.new_task_id(self.run_id, self.step))
1252
+ task_id = str(
1253
+ self.metadata.new_task_id(self.run_id, self.step, sys_tags=tags)
1254
+ )
1124
1255
  already_existed = False
1125
1256
  else:
1126
- # task_id is preset only by persist_constants() or control tasks.
1127
- if self.ubf_context == UBF_CONTROL:
1128
- tags = [CONTROL_TASK_TAG]
1129
- attempt_id = 0
1130
- already_existed = not self.metadata.register_task_id(
1131
- self.run_id,
1132
- self.step,
1133
- task_id,
1134
- attempt_id,
1135
- sys_tags=tags,
1136
- )
1137
- # A Task's tags are now those of its ancestral Run, so we are not able
1138
- # to rely on a task's tags to indicate the presence of a control task
1139
- # so, on top of adding the tags above, we also add a task metadata
1140
- # entry indicating that this is a "control task".
1141
- #
1142
- # Here we will also add a task metadata entry to indicate "control task".
1143
- # Within the metaflow repo, the only dependency of such a "control task"
1144
- # indicator is in the integration test suite (see Step.control_tasks() in
1145
- # client API).
1146
- task_metadata_list = [
1147
- MetaDatum(
1148
- field="internal_task_type",
1149
- value=CONTROL_TASK_TAG,
1150
- type="internal_task_type",
1151
- tags=["attempt_id:{0}".format(attempt_id)],
1152
- )
1153
- ]
1154
- self.metadata.register_metadata(
1155
- self.run_id, self.step, task_id, task_metadata_list
1156
- )
1157
- else:
1158
- already_existed = not self.metadata.register_task_id(
1159
- self.run_id, self.step, task_id, 0
1160
- )
1257
+ # task_id is preset only by persist_constants().
1258
+ already_existed = not self.metadata.register_task_id(
1259
+ self.run_id,
1260
+ self.step,
1261
+ task_id,
1262
+ 0,
1263
+ sys_tags=tags,
1264
+ )
1161
1265
 
1162
1266
  self.task_id = task_id
1163
1267
  self._path = "%s/%s/%s" % (self.run_id, self.step, self.task_id)
1164
1268
  return already_existed
1165
1269
 
1166
- def _find_origin_task(self, clone_run_id, join_type):
1167
- if self.step == "_parameters":
1270
+ def _find_origin_task(self, clone_run_id, join_type, pathspec_index=None):
1271
+ if pathspec_index:
1272
+ origin = self.origin_ds_set.get_with_pathspec_index(pathspec_index)
1273
+ return origin
1274
+ elif self.step == "_parameters":
1168
1275
  pathspec = "%s/_parameters[]" % clone_run_id
1169
1276
  origin = self.origin_ds_set.get_with_pathspec_index(pathspec)
1170
1277
 
@@ -1214,6 +1321,11 @@ class Task(object):
1214
1321
  )
1215
1322
  return self._results_ds
1216
1323
 
1324
+ @property
1325
+ def task_index(self):
1326
+ _, task_index = self.results.pathspec_index.split("/")
1327
+ return task_index
1328
+
1217
1329
  @property
1218
1330
  def finished_id(self):
1219
1331
  # note: id is not available before the task has finished.
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.12.8"
1
+ metaflow_version = "2.12.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metaflow
3
- Version: 2.12.8
3
+ Version: 2.12.9
4
4
  Summary: Metaflow: More Data Science, Less Engineering
5
5
  Author: Metaflow Developers
6
6
  Author-email: help@metaflow.org
@@ -26,7 +26,7 @@ License-File: LICENSE
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: boto3
28
28
  Provides-Extra: stubs
29
- Requires-Dist: metaflow-stubs ==2.12.8 ; extra == 'stubs'
29
+ Requires-Dist: metaflow-stubs ==2.12.9 ; extra == 'stubs'
30
30
 
31
31
  ![Metaflow_Logo_Horizontal_FullColor_Ribbon_Dark_RGB](https://user-images.githubusercontent.com/763451/89453116-96a57e00-d713-11ea-9fa6-82b29d4d6eff.png)
32
32