ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (96) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cards.py +1 -0
  3. metaflow/cli.py +185 -717
  4. metaflow/cli_args.py +17 -0
  5. metaflow/cli_components/__init__.py +0 -0
  6. metaflow/cli_components/dump_cmd.py +96 -0
  7. metaflow/cli_components/init_cmd.py +51 -0
  8. metaflow/cli_components/run_cmds.py +362 -0
  9. metaflow/cli_components/step_cmd.py +176 -0
  10. metaflow/cli_components/utils.py +140 -0
  11. metaflow/cmd/develop/stub_generator.py +9 -2
  12. metaflow/datastore/flow_datastore.py +2 -2
  13. metaflow/decorators.py +63 -2
  14. metaflow/exception.py +8 -2
  15. metaflow/extension_support/plugins.py +42 -27
  16. metaflow/flowspec.py +176 -23
  17. metaflow/graph.py +28 -27
  18. metaflow/includefile.py +50 -22
  19. metaflow/lint.py +35 -20
  20. metaflow/metadata_provider/heartbeat.py +23 -8
  21. metaflow/metaflow_config.py +10 -1
  22. metaflow/multicore_utils.py +31 -14
  23. metaflow/package.py +17 -3
  24. metaflow/parameters.py +97 -25
  25. metaflow/plugins/__init__.py +22 -0
  26. metaflow/plugins/airflow/airflow.py +18 -17
  27. metaflow/plugins/airflow/airflow_cli.py +1 -0
  28. metaflow/plugins/argo/argo_client.py +0 -2
  29. metaflow/plugins/argo/argo_workflows.py +195 -132
  30. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  31. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  32. metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
  33. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  34. metaflow/plugins/aws/aws_utils.py +6 -1
  35. metaflow/plugins/aws/batch/batch_client.py +1 -3
  36. metaflow/plugins/aws/batch/batch_decorator.py +13 -13
  37. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  38. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  39. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  40. metaflow/plugins/aws/step_functions/step_functions.py +33 -1
  41. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  42. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  43. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
  44. metaflow/plugins/cards/card_cli.py +7 -2
  45. metaflow/plugins/cards/card_creator.py +1 -0
  46. metaflow/plugins/cards/card_decorator.py +79 -8
  47. metaflow/plugins/cards/card_modules/basic.py +56 -5
  48. metaflow/plugins/cards/card_modules/card.py +16 -1
  49. metaflow/plugins/cards/card_modules/components.py +64 -16
  50. metaflow/plugins/cards/card_modules/main.js +27 -25
  51. metaflow/plugins/cards/card_modules/test_cards.py +4 -4
  52. metaflow/plugins/cards/component_serializer.py +1 -1
  53. metaflow/plugins/datatools/s3/s3.py +12 -4
  54. metaflow/plugins/datatools/s3/s3op.py +3 -3
  55. metaflow/plugins/events_decorator.py +338 -186
  56. metaflow/plugins/kubernetes/kube_utils.py +84 -1
  57. metaflow/plugins/kubernetes/kubernetes.py +40 -92
  58. metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
  59. metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
  60. metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
  61. metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
  62. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  63. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  64. metaflow/plugins/parallel_decorator.py +4 -1
  65. metaflow/plugins/project_decorator.py +33 -5
  66. metaflow/plugins/pypi/bootstrap.py +249 -81
  67. metaflow/plugins/pypi/conda_decorator.py +20 -10
  68. metaflow/plugins/pypi/conda_environment.py +83 -27
  69. metaflow/plugins/pypi/micromamba.py +82 -37
  70. metaflow/plugins/pypi/pip.py +9 -6
  71. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  72. metaflow/plugins/pypi/utils.py +4 -2
  73. metaflow/plugins/timeout_decorator.py +2 -2
  74. metaflow/runner/click_api.py +240 -50
  75. metaflow/runner/deployer.py +1 -1
  76. metaflow/runner/deployer_impl.py +12 -11
  77. metaflow/runner/metaflow_runner.py +68 -34
  78. metaflow/runner/nbdeploy.py +2 -0
  79. metaflow/runner/nbrun.py +1 -1
  80. metaflow/runner/subprocess_manager.py +61 -10
  81. metaflow/runner/utils.py +208 -44
  82. metaflow/runtime.py +216 -112
  83. metaflow/sidecar/sidecar_worker.py +1 -1
  84. metaflow/tracing/tracing_modules.py +4 -1
  85. metaflow/user_configs/__init__.py +0 -0
  86. metaflow/user_configs/config_decorators.py +563 -0
  87. metaflow/user_configs/config_options.py +548 -0
  88. metaflow/user_configs/config_parameters.py +436 -0
  89. metaflow/util.py +22 -0
  90. metaflow/version.py +1 -1
  91. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
  92. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
  93. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
  94. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
  95. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
  96. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
metaflow/runtime.py CHANGED
@@ -6,9 +6,12 @@ using local / remote processes
6
6
  """
7
7
 
8
8
  from __future__ import print_function
9
+ import json
9
10
  import os
10
11
  import sys
11
12
  import fcntl
13
+ import re
14
+ import tempfile
12
15
  import time
13
16
  import subprocess
14
17
  from datetime import datetime
@@ -31,6 +34,7 @@ from . import procpoll
31
34
  from .datastore import TaskDataStoreSet
32
35
  from .debug import debug
33
36
  from .decorators import flow_decorators
37
+ from .flowspec import _FlowState
34
38
  from .mflog import mflog, RUNTIME_LOG_SOURCE
35
39
  from .util import to_unicode, compress_list, unicode_type
36
40
  from .clone_util import clone_task_helper
@@ -39,6 +43,10 @@ from .unbounded_foreach import (
39
43
  UBF_CONTROL,
40
44
  UBF_TASK,
41
45
  )
46
+
47
+ from .user_configs.config_options import ConfigInput
48
+ from .user_configs.config_parameters import dump_config_values
49
+
42
50
  import metaflow.tracing as tracing
43
51
 
44
52
  MAX_WORKERS = 16
@@ -49,7 +57,13 @@ PROGRESS_INTERVAL = 300 # s
49
57
  # The following is a list of the (data) artifacts used by the runtime while
50
58
  # executing a flow. These are prefetched during the resume operation by
51
59
  # leveraging the TaskDataStoreSet.
52
- PREFETCH_DATA_ARTIFACTS = ["_foreach_stack", "_task_ok", "_transition"]
60
+ PREFETCH_DATA_ARTIFACTS = [
61
+ "_foreach_stack",
62
+ "_task_ok",
63
+ "_transition",
64
+ "_control_mapper_tasks",
65
+ "_control_task_is_mapper_zero",
66
+ ]
53
67
  RESUME_POLL_SECONDS = 60
54
68
 
55
69
  # Runtime must use logsource=RUNTIME_LOG_SOURCE for all loglines that it
@@ -111,7 +125,7 @@ class NativeRuntime(object):
111
125
  self._clone_run_id = clone_run_id
112
126
  self._clone_only = clone_only
113
127
  self._cloned_tasks = []
114
- self._cloned_task_index = set()
128
+ self._ran_or_scheduled_task_index = set()
115
129
  self._reentrant = reentrant
116
130
  self._run_url = None
117
131
 
@@ -269,6 +283,8 @@ class NativeRuntime(object):
269
283
  step_name,
270
284
  task_id,
271
285
  pathspec_index,
286
+ cloned_task_pathspec_index,
287
+ finished_tuple,
272
288
  ubf_context,
273
289
  generate_task_obj,
274
290
  verbose=False,
@@ -281,8 +297,13 @@ class NativeRuntime(object):
281
297
  task.ubf_context = ubf_context
282
298
  new_task_id = task.task_id
283
299
  self._cloned_tasks.append(task)
284
- self._cloned_task_index.add(task.task_index)
285
-
300
+ self._ran_or_scheduled_task_index.add(cloned_task_pathspec_index)
301
+ task_pathspec = "{}/{}/{}".format(self._run_id, step_name, new_task_id)
302
+ else:
303
+ task_pathspec = "{}/{}/{}".format(self._run_id, step_name, new_task_id)
304
+ Task.clone_pathspec_mapping[task_pathspec] = "{}/{}/{}".format(
305
+ self._clone_run_id, step_name, task_id
306
+ )
286
307
  if verbose:
287
308
  self._logger(
288
309
  "Cloning task from {}/{}/{}/{} to {}/{}/{}/{}".format(
@@ -308,6 +329,8 @@ class NativeRuntime(object):
308
329
  self._metadata,
309
330
  origin_ds_set=self._origin_ds_set,
310
331
  )
332
+ self._finished[(step_name, finished_tuple)] = task_pathspec
333
+ self._is_cloned[task_pathspec] = True
311
334
  except Exception as e:
312
335
  self._logger(
313
336
  "Cloning {}/{}/{}/{} failed with error: {}".format(
@@ -323,7 +346,8 @@ class NativeRuntime(object):
323
346
 
324
347
  inputs = []
325
348
 
326
- ubf_mapper_tasks_to_clone = []
349
+ ubf_mapper_tasks_to_clone = set()
350
+ ubf_control_tasks = set()
327
351
  # We only clone ubf mapper tasks if the control task is complete.
328
352
  # Here we need to check which control tasks are complete, and then get the corresponding
329
353
  # mapper tasks.
@@ -331,13 +355,25 @@ class NativeRuntime(object):
331
355
  _, step_name, task_id = task_ds.pathspec.split("/")
332
356
  pathspec_index = task_ds.pathspec_index
333
357
  if task_ds["_task_ok"] and step_name != "_parameters":
334
- # Only control task can have _control_mapper_tasks. We then store the corresponding mapepr task pathspecs.
358
+ # Control task contains "_control_mapper_tasks" but, in the case of
359
+ # @parallel decorator, the control task is also a mapper task so we
360
+ # need to distinguish this using _control_task_is_mapper_zero
335
361
  control_mapper_tasks = (
336
362
  []
337
363
  if "_control_mapper_tasks" not in task_ds
338
364
  else task_ds["_control_mapper_tasks"]
339
365
  )
340
- ubf_mapper_tasks_to_clone.extend(control_mapper_tasks)
366
+ if control_mapper_tasks:
367
+ if task_ds.get("_control_task_is_mapper_zero", False):
368
+ # Strip out the control task of list of mapper tasks
369
+ ubf_control_tasks.add(control_mapper_tasks[0])
370
+ ubf_mapper_tasks_to_clone.update(control_mapper_tasks[1:])
371
+ else:
372
+ ubf_mapper_tasks_to_clone.update(control_mapper_tasks)
373
+ # Since we only add mapper tasks here, if we are not in the list
374
+ # we are a control task
375
+ if task_ds.pathspec not in ubf_mapper_tasks_to_clone:
376
+ ubf_control_tasks.add(task_ds.pathspec)
341
377
 
342
378
  for task_ds in self._origin_ds_set:
343
379
  _, step_name, task_id = task_ds.pathspec.split("/")
@@ -348,35 +384,58 @@ class NativeRuntime(object):
348
384
  and step_name != "_parameters"
349
385
  and (step_name not in self._steps_to_rerun)
350
386
  ):
351
- # "_unbounded_foreach" is a special flag to indicate that the transition is an unbounded foreach.
352
- # Both parent and splitted children tasks will have this flag set. The splitted control/mapper tasks
353
- # have no "foreach_param" because UBF is always followed by a join step.
387
+ # "_unbounded_foreach" is a special flag to indicate that the transition
388
+ # is an unbounded foreach.
389
+ # Both parent and splitted children tasks will have this flag set.
390
+ # The splitted control/mapper tasks
391
+ # are not foreach types because UBF is always followed by a join step.
354
392
  is_ubf_task = (
355
393
  "_unbounded_foreach" in task_ds and task_ds["_unbounded_foreach"]
356
- ) and (self._graph[step_name].foreach_param is None)
394
+ ) and (self._graph[step_name].type != "foreach")
357
395
 
358
- # Only the control task has "_control_mapper_tasks" artifact.
359
- is_ubf_control_task = (
360
- is_ubf_task
361
- and ("_control_mapper_tasks" in task_ds)
362
- and task_ds["_control_mapper_tasks"]
363
- )
364
- is_ubf_mapper_tasks = is_ubf_task and (not is_ubf_control_task)
365
- if is_ubf_mapper_tasks and (
396
+ is_ubf_control_task = task_ds.pathspec in ubf_control_tasks
397
+
398
+ is_ubf_mapper_task = is_ubf_task and (not is_ubf_control_task)
399
+
400
+ if is_ubf_mapper_task and (
366
401
  task_ds.pathspec not in ubf_mapper_tasks_to_clone
367
402
  ):
368
- # Skip copying UBF mapper tasks if control tasks is incomplete.
403
+ # Skip copying UBF mapper tasks if control task is incomplete.
369
404
  continue
370
405
 
371
406
  ubf_context = None
372
407
  if is_ubf_task:
373
- ubf_context = "ubf_test" if is_ubf_mapper_tasks else "ubf_control"
408
+ ubf_context = "ubf_test" if is_ubf_mapper_task else "ubf_control"
409
+
410
+ finished_tuple = tuple(
411
+ [s._replace(value=0) for s in task_ds.get("_foreach_stack", ())]
412
+ )
413
+ cloned_task_pathspec_index = pathspec_index.split("/")[1]
414
+ if task_ds.get("_control_task_is_mapper_zero", False):
415
+ # Replace None with index 0 for control task as it is part of the
416
+ # UBF (as a mapper as well)
417
+ finished_tuple = finished_tuple[:-1] + (
418
+ finished_tuple[-1]._replace(index=0),
419
+ )
420
+ # We need this reverse override though because when we check
421
+ # if a task has been cloned in _queue_push, the index will be None
422
+ # because the _control_task_is_mapper_zero is set in the control
423
+ # task *itself* and *not* in the one that is launching the UBF nest.
424
+ # This means that _translate_index will use None.
425
+ cloned_task_pathspec_index = re.sub(
426
+ r"(\[(?:\d+, ?)*)0\]",
427
+ lambda m: (m.group(1) or "[") + "None]",
428
+ cloned_task_pathspec_index,
429
+ )
430
+
374
431
  inputs.append(
375
432
  (
376
433
  step_name,
377
434
  task_id,
378
435
  pathspec_index,
379
- is_ubf_mapper_tasks,
436
+ cloned_task_pathspec_index,
437
+ finished_tuple,
438
+ is_ubf_mapper_task,
380
439
  ubf_context,
381
440
  )
382
441
  )
@@ -388,15 +447,19 @@ class NativeRuntime(object):
388
447
  step_name,
389
448
  task_id,
390
449
  pathspec_index,
450
+ cloned_task_pathspec_index,
451
+ finished_tuple,
391
452
  ubf_context=ubf_context,
392
- generate_task_obj=generate_task_obj and (not is_ubf_mapper_tasks),
453
+ generate_task_obj=generate_task_obj and (not is_ubf_mapper_task),
393
454
  verbose=verbose,
394
455
  )
395
456
  for (
396
457
  step_name,
397
458
  task_id,
398
459
  pathspec_index,
399
- is_ubf_mapper_tasks,
460
+ cloned_task_pathspec_index,
461
+ finished_tuple,
462
+ is_ubf_mapper_task,
400
463
  ubf_context,
401
464
  ) in inputs
402
465
  ]
@@ -417,82 +480,95 @@ class NativeRuntime(object):
417
480
  else:
418
481
  self._queue_push("start", {})
419
482
  progress_tstamp = time.time()
420
- try:
421
- # main scheduling loop
422
- exception = None
423
- while self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks:
424
- # 1. are any of the current workers finished?
425
- if self._cloned_tasks:
426
- finished_tasks = self._cloned_tasks
427
- # reset the list of cloned tasks and let poll_workers handle
428
- # the remaining transition
429
- self._cloned_tasks = []
430
- else:
431
- finished_tasks = list(self._poll_workers())
432
- # 2. push new tasks triggered by the finished tasks to the queue
433
- self._queue_tasks(finished_tasks)
434
- # 3. if there are available worker slots, pop and start tasks
435
- # from the queue.
436
- self._launch_workers()
437
-
438
- if time.time() - progress_tstamp > PROGRESS_INTERVAL:
439
- progress_tstamp = time.time()
440
- tasks_print = ", ".join(
441
- [
442
- "%s (%d running; %d done)" % (k, v[0], v[1])
443
- for k, v in self._active_tasks.items()
444
- if k != 0 and v[0] > 0
445
- ]
446
- )
447
- if self._active_tasks[0] == 0:
448
- msg = "No tasks are running."
483
+ with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
484
+ # Configurations are passed through a file to avoid overloading the
485
+ # command-line. We only need to create this file once and it can be reused
486
+ # for any task launch
487
+ config_value = dump_config_values(self._flow)
488
+ if config_value:
489
+ json.dump(config_value, config_file)
490
+ config_file.flush()
491
+ self._config_file_name = config_file.name
492
+ else:
493
+ self._config_file_name = None
494
+ try:
495
+ # main scheduling loop
496
+ exception = None
497
+ while (
498
+ self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks
499
+ ):
500
+ # 1. are any of the current workers finished?
501
+ if self._cloned_tasks:
502
+ finished_tasks = self._cloned_tasks
503
+ # reset the list of cloned tasks and let poll_workers handle
504
+ # the remaining transition
505
+ self._cloned_tasks = []
449
506
  else:
450
- if self._active_tasks[0] == 1:
451
- msg = "1 task is running: "
507
+ finished_tasks = list(self._poll_workers())
508
+ # 2. push new tasks triggered by the finished tasks to the queue
509
+ self._queue_tasks(finished_tasks)
510
+ # 3. if there are available worker slots, pop and start tasks
511
+ # from the queue.
512
+ self._launch_workers()
513
+
514
+ if time.time() - progress_tstamp > PROGRESS_INTERVAL:
515
+ progress_tstamp = time.time()
516
+ tasks_print = ", ".join(
517
+ [
518
+ "%s (%d running; %d done)" % (k, v[0], v[1])
519
+ for k, v in self._active_tasks.items()
520
+ if k != 0 and v[0] > 0
521
+ ]
522
+ )
523
+ if self._active_tasks[0] == 0:
524
+ msg = "No tasks are running."
452
525
  else:
453
- msg = "%d tasks are running: " % self._active_tasks[0]
454
- msg += "%s." % tasks_print
526
+ if self._active_tasks[0] == 1:
527
+ msg = "1 task is running: "
528
+ else:
529
+ msg = "%d tasks are running: " % self._active_tasks[0]
530
+ msg += "%s." % tasks_print
455
531
 
456
- self._logger(msg, system_msg=True)
532
+ self._logger(msg, system_msg=True)
457
533
 
458
- if len(self._run_queue) == 0:
459
- msg = "No tasks are waiting in the queue."
460
- else:
461
- if len(self._run_queue) == 1:
462
- msg = "1 task is waiting in the queue: "
534
+ if len(self._run_queue) == 0:
535
+ msg = "No tasks are waiting in the queue."
463
536
  else:
464
- msg = "%d tasks are waiting in the queue." % len(
465
- self._run_queue
466
- )
537
+ if len(self._run_queue) == 1:
538
+ msg = "1 task is waiting in the queue: "
539
+ else:
540
+ msg = "%d tasks are waiting in the queue." % len(
541
+ self._run_queue
542
+ )
467
543
 
468
- self._logger(msg, system_msg=True)
469
- if len(self._unprocessed_steps) > 0:
470
- if len(self._unprocessed_steps) == 1:
471
- msg = "%s step has not started" % (
472
- next(iter(self._unprocessed_steps)),
473
- )
474
- else:
475
- msg = "%d steps have not started: " % len(
476
- self._unprocessed_steps
477
- )
478
- msg += "%s." % ", ".join(self._unprocessed_steps)
479
544
  self._logger(msg, system_msg=True)
480
-
481
- except KeyboardInterrupt as ex:
482
- self._logger("Workflow interrupted.", system_msg=True, bad=True)
483
- self._killall()
484
- exception = ex
485
- raise
486
- except Exception as ex:
487
- self._logger("Workflow failed.", system_msg=True, bad=True)
488
- self._killall()
489
- exception = ex
490
- raise
491
- finally:
492
- # on finish clean tasks
493
- for step in self._flow:
494
- for deco in step.decorators:
495
- deco.runtime_finished(exception)
545
+ if len(self._unprocessed_steps) > 0:
546
+ if len(self._unprocessed_steps) == 1:
547
+ msg = "%s step has not started" % (
548
+ next(iter(self._unprocessed_steps)),
549
+ )
550
+ else:
551
+ msg = "%d steps have not started: " % len(
552
+ self._unprocessed_steps
553
+ )
554
+ msg += "%s." % ", ".join(self._unprocessed_steps)
555
+ self._logger(msg, system_msg=True)
556
+
557
+ except KeyboardInterrupt as ex:
558
+ self._logger("Workflow interrupted.", system_msg=True, bad=True)
559
+ self._killall()
560
+ exception = ex
561
+ raise
562
+ except Exception as ex:
563
+ self._logger("Workflow failed.", system_msg=True, bad=True)
564
+ self._killall()
565
+ exception = ex
566
+ raise
567
+ finally:
568
+ # on finish clean tasks
569
+ for step in self._flow:
570
+ for deco in step.decorators:
571
+ deco.runtime_finished(exception)
496
572
 
497
573
  # assert that end was executed and it was successful
498
574
  if ("end", ()) in self._finished:
@@ -546,7 +622,6 @@ class NativeRuntime(object):
546
622
  # Given the current task information (task_index), the type of transition,
547
623
  # and the split index, return the new task index.
548
624
  def _translate_index(self, task, next_step, type, split_index=None):
549
- import re
550
625
 
551
626
  match = re.match(r"^(.+)\[(.*)\]$", task.task_index)
552
627
  if match:
@@ -574,10 +649,18 @@ class NativeRuntime(object):
574
649
  # Store the parameters needed for task creation, so that pushing on items
575
650
  # onto the run_queue is an inexpensive operation.
576
651
  def _queue_push(self, step, task_kwargs, index=None):
577
- # If the to-be-pushed task is already cloned before, we don't need
578
- # to re-run it.
579
- if index and index in self._cloned_task_index:
580
- return
652
+ # In the case of cloning, we set all the cloned tasks as the
653
+ # finished tasks when pushing tasks using _queue_tasks. This means that we
654
+ # could potentially try to push the same task multiple times (for example
655
+ # if multiple parents of a join are cloned). We therefore keep track of what
656
+ # has executed (been cloned) or what has been scheduled and avoid scheduling
657
+ # it again.
658
+ if index:
659
+ if index in self._ran_or_scheduled_task_index:
660
+ # It has already run or been scheduled
661
+ return
662
+ # Note that we are scheduling this to run
663
+ self._ran_or_scheduled_task_index.add(index)
581
664
  self._run_queue.insert(0, (step, task_kwargs))
582
665
  # For foreaches, this will happen multiple time but is ok, becomes a no-op
583
666
  self._unprocessed_steps.discard(step)
@@ -640,15 +723,18 @@ class NativeRuntime(object):
640
723
  # If the control task is cloned, all mapper tasks should have been cloned
641
724
  # as well, so we no longer need to handle cloning of mapper tasks in runtime.
642
725
 
643
- # Update _finished since these tasks were successfully
644
- # run elsewhere so that join will be unblocked.
645
- _, foreach_stack = task.finished_id
646
- top = foreach_stack[-1]
647
- bottom = list(foreach_stack[:-1])
648
- for i in range(num_splits):
649
- s = tuple(bottom + [top._replace(index=i)])
650
- self._finished[(task.step, s)] = mapper_tasks[i]
651
- self._is_cloned[mapper_tasks[i]] = False
726
+ # Update _finished if we are not cloned. If we were cloned, we already
727
+ # updated _finished with the new tasks. Note that the *value* of mapper
728
+ # tasks is incorrect and contains the pathspec of the *cloned* run
729
+ # but we don't use it for anything. We could look to clean it up though
730
+ if not task.is_cloned:
731
+ _, foreach_stack = task.finished_id
732
+ top = foreach_stack[-1]
733
+ bottom = list(foreach_stack[:-1])
734
+ for i in range(num_splits):
735
+ s = tuple(bottom + [top._replace(index=i)])
736
+ self._finished[(task.step, s)] = mapper_tasks[i]
737
+ self._is_cloned[mapper_tasks[i]] = False
652
738
 
653
739
  # Find and check status of control task and retrieve its pathspec
654
740
  # for retrieving unbounded foreach cardinality.
@@ -901,7 +987,7 @@ class NativeRuntime(object):
901
987
  )
902
988
  return
903
989
 
904
- worker = Worker(task, self._max_log_size)
990
+ worker = Worker(task, self._max_log_size, self._config_file_name)
905
991
  for fd in worker.fds():
906
992
  self._workers[fd] = worker
907
993
  self._poll.add(fd)
@@ -1080,7 +1166,7 @@ class Task(object):
1080
1166
  # To avoid the edge case where the resume leader is selected but has not
1081
1167
  # yet written the _resume_leader metadata, we will wait for a few seconds.
1082
1168
  # We will wait for resume leader for at most 3 times.
1083
- for resume_leader_wait_retry in range(3):
1169
+ for _ in range(3):
1084
1170
  if ds.has_metadata("_resume_leader", add_attempt=False):
1085
1171
  resume_leader = ds.load_metadata(
1086
1172
  ["_resume_leader"], add_attempt=False
@@ -1181,7 +1267,6 @@ class Task(object):
1181
1267
  # Open the output datastore only if the task is not being cloned.
1182
1268
  if not self._is_cloned:
1183
1269
  self.new_attempt()
1184
-
1185
1270
  for deco in decos:
1186
1271
  deco.runtime_task_created(
1187
1272
  self._ds,
@@ -1448,6 +1533,15 @@ class CLIArgs(object):
1448
1533
  for deco in flow_decorators(self.task.flow):
1449
1534
  self.top_level_options.update(deco.get_top_level_options())
1450
1535
 
1536
+ # We also pass configuration options using the kv.<name> syntax which will cause
1537
+ # the configuration options to be loaded from the CONFIG file (or local-config-file
1538
+ # in the case of the local runtime)
1539
+ configs = self.task.flow._flow_state.get(_FlowState.CONFIGS)
1540
+ if configs:
1541
+ self.top_level_options["config-value"] = [
1542
+ (k, ConfigInput.make_key_name(k)) for k in configs
1543
+ ]
1544
+
1451
1545
  self.commands = ["step"]
1452
1546
  self.command_args = [self.task.step]
1453
1547
  self.command_options = {
@@ -1481,12 +1575,15 @@ class CLIArgs(object):
1481
1575
  for value in v:
1482
1576
  yield "--%s" % k
1483
1577
  if not isinstance(value, bool):
1484
- yield to_unicode(value)
1578
+ value = value if isinstance(value, tuple) else (value,)
1579
+ for vv in value:
1580
+ yield to_unicode(vv)
1485
1581
 
1486
1582
  args = list(self.entrypoint)
1487
1583
  args.extend(_options(self.top_level_options))
1488
1584
  args.extend(self.commands)
1489
1585
  args.extend(self.command_args)
1586
+
1490
1587
  args.extend(_options(self.command_options))
1491
1588
  return args
1492
1589
 
@@ -1498,8 +1595,9 @@ class CLIArgs(object):
1498
1595
 
1499
1596
 
1500
1597
  class Worker(object):
1501
- def __init__(self, task, max_logs_size):
1598
+ def __init__(self, task, max_logs_size, config_file_name):
1502
1599
  self.task = task
1600
+ self._config_file_name = config_file_name
1503
1601
  self._proc = self._launch()
1504
1602
 
1505
1603
  if task.retries > task.user_code_retries:
@@ -1551,6 +1649,12 @@ class Worker(object):
1551
1649
  self.task.user_code_retries,
1552
1650
  self.task.ubf_context,
1553
1651
  )
1652
+
1653
+ # Add user configurations using a file to avoid using up too much space on the
1654
+ # command line
1655
+ if self._config_file_name:
1656
+ args.top_level_options["local-config-file"] = self._config_file_name
1657
+ # Pass configuration options
1554
1658
  env.update(args.get_env())
1555
1659
  env["PYTHONUNBUFFERED"] = "x"
1556
1660
  tracing.inject_tracing_vars(env)
@@ -48,8 +48,8 @@ def process_messages(worker_type, worker):
48
48
  pass
49
49
 
50
50
 
51
- @tracing.cli_entrypoint("sidecar")
52
51
  @click.command(help="Initialize workers")
52
+ @tracing.cli_entrypoint("sidecar")
53
53
  @click.argument("worker-type")
54
54
  def main(worker_type):
55
55
  sidecar_type = SIDECARS.get(worker_type)
@@ -18,8 +18,11 @@ tracer_provider = None
18
18
 
19
19
  def init_tracing():
20
20
  global tracer_provider
21
+ # Disable logging from opentelemetry
22
+ import logging
23
+ logging.getLogger("opentelemetry").setLevel(logging.FATAL)
21
24
  if tracer_provider is not None:
22
- print("Tracing already initialized", file=sys.stderr)
25
+ # print("Tracing already initialized", file=sys.stderr)
23
26
  return
24
27
 
25
28
  from .propagator import EnvPropagator
File without changes