metaflow 2.12.36__py2.py3-none-any.whl → 2.12.37__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +54 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +41 -1
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  23. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  24. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  25. metaflow/plugins/datatools/s3/s3op.py +3 -3
  26. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  27. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  28. metaflow/plugins/pypi/conda_decorator.py +22 -0
  29. metaflow/plugins/pypi/pypi_decorator.py +1 -0
  30. metaflow/plugins/timeout_decorator.py +2 -2
  31. metaflow/runner/click_api.py +73 -19
  32. metaflow/runtime.py +111 -73
  33. metaflow/sidecar/sidecar_worker.py +1 -1
  34. metaflow/user_configs/__init__.py +0 -0
  35. metaflow/user_configs/config_decorators.py +563 -0
  36. metaflow/user_configs/config_options.py +495 -0
  37. metaflow/user_configs/config_parameters.py +386 -0
  38. metaflow/util.py +17 -0
  39. metaflow/version.py +1 -1
  40. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/METADATA +3 -2
  41. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/RECORD +45 -35
  42. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/LICENSE +0 -0
  43. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/WHEEL +0 -0
  44. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/entry_points.txt +0 -0
  45. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/top_level.txt +0 -0
metaflow/runtime.py CHANGED
@@ -6,10 +6,12 @@ using local / remote processes
6
6
  """
7
7
 
8
8
  from __future__ import print_function
9
+ import json
9
10
  import os
10
11
  import sys
11
12
  import fcntl
12
13
  import re
14
+ import tempfile
13
15
  import time
14
16
  import subprocess
15
17
  from datetime import datetime
@@ -32,6 +34,7 @@ from . import procpoll
32
34
  from .datastore import TaskDataStoreSet
33
35
  from .debug import debug
34
36
  from .decorators import flow_decorators
37
+ from .flowspec import _FlowState
35
38
  from .mflog import mflog, RUNTIME_LOG_SOURCE
36
39
  from .util import to_unicode, compress_list, unicode_type
37
40
  from .clone_util import clone_task_helper
@@ -40,6 +43,10 @@ from .unbounded_foreach import (
40
43
  UBF_CONTROL,
41
44
  UBF_TASK,
42
45
  )
46
+
47
+ from .user_configs.config_options import ConfigInput
48
+ from .user_configs.config_parameters import dump_config_values
49
+
43
50
  import metaflow.tracing as tracing
44
51
 
45
52
  MAX_WORKERS = 16
@@ -471,82 +478,95 @@ class NativeRuntime(object):
471
478
  else:
472
479
  self._queue_push("start", {})
473
480
  progress_tstamp = time.time()
474
- try:
475
- # main scheduling loop
476
- exception = None
477
- while self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks:
478
- # 1. are any of the current workers finished?
479
- if self._cloned_tasks:
480
- finished_tasks = self._cloned_tasks
481
- # reset the list of cloned tasks and let poll_workers handle
482
- # the remaining transition
483
- self._cloned_tasks = []
484
- else:
485
- finished_tasks = list(self._poll_workers())
486
- # 2. push new tasks triggered by the finished tasks to the queue
487
- self._queue_tasks(finished_tasks)
488
- # 3. if there are available worker slots, pop and start tasks
489
- # from the queue.
490
- self._launch_workers()
491
-
492
- if time.time() - progress_tstamp > PROGRESS_INTERVAL:
493
- progress_tstamp = time.time()
494
- tasks_print = ", ".join(
495
- [
496
- "%s (%d running; %d done)" % (k, v[0], v[1])
497
- for k, v in self._active_tasks.items()
498
- if k != 0 and v[0] > 0
499
- ]
500
- )
501
- if self._active_tasks[0] == 0:
502
- msg = "No tasks are running."
481
+ with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
482
+ # Configurations are passed through a file to avoid overloading the
483
+ # command-line. We only need to create this file once and it can be reused
484
+ # for any task launch
485
+ config_value = dump_config_values(self._flow)
486
+ if config_value:
487
+ json.dump(config_value, config_file)
488
+ config_file.flush()
489
+ self._config_file_name = config_file.name
490
+ else:
491
+ self._config_file_name = None
492
+ try:
493
+ # main scheduling loop
494
+ exception = None
495
+ while (
496
+ self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks
497
+ ):
498
+ # 1. are any of the current workers finished?
499
+ if self._cloned_tasks:
500
+ finished_tasks = self._cloned_tasks
501
+ # reset the list of cloned tasks and let poll_workers handle
502
+ # the remaining transition
503
+ self._cloned_tasks = []
503
504
  else:
504
- if self._active_tasks[0] == 1:
505
- msg = "1 task is running: "
505
+ finished_tasks = list(self._poll_workers())
506
+ # 2. push new tasks triggered by the finished tasks to the queue
507
+ self._queue_tasks(finished_tasks)
508
+ # 3. if there are available worker slots, pop and start tasks
509
+ # from the queue.
510
+ self._launch_workers()
511
+
512
+ if time.time() - progress_tstamp > PROGRESS_INTERVAL:
513
+ progress_tstamp = time.time()
514
+ tasks_print = ", ".join(
515
+ [
516
+ "%s (%d running; %d done)" % (k, v[0], v[1])
517
+ for k, v in self._active_tasks.items()
518
+ if k != 0 and v[0] > 0
519
+ ]
520
+ )
521
+ if self._active_tasks[0] == 0:
522
+ msg = "No tasks are running."
506
523
  else:
507
- msg = "%d tasks are running: " % self._active_tasks[0]
508
- msg += "%s." % tasks_print
524
+ if self._active_tasks[0] == 1:
525
+ msg = "1 task is running: "
526
+ else:
527
+ msg = "%d tasks are running: " % self._active_tasks[0]
528
+ msg += "%s." % tasks_print
509
529
 
510
- self._logger(msg, system_msg=True)
530
+ self._logger(msg, system_msg=True)
511
531
 
512
- if len(self._run_queue) == 0:
513
- msg = "No tasks are waiting in the queue."
514
- else:
515
- if len(self._run_queue) == 1:
516
- msg = "1 task is waiting in the queue: "
532
+ if len(self._run_queue) == 0:
533
+ msg = "No tasks are waiting in the queue."
517
534
  else:
518
- msg = "%d tasks are waiting in the queue." % len(
519
- self._run_queue
520
- )
535
+ if len(self._run_queue) == 1:
536
+ msg = "1 task is waiting in the queue: "
537
+ else:
538
+ msg = "%d tasks are waiting in the queue." % len(
539
+ self._run_queue
540
+ )
521
541
 
522
- self._logger(msg, system_msg=True)
523
- if len(self._unprocessed_steps) > 0:
524
- if len(self._unprocessed_steps) == 1:
525
- msg = "%s step has not started" % (
526
- next(iter(self._unprocessed_steps)),
527
- )
528
- else:
529
- msg = "%d steps have not started: " % len(
530
- self._unprocessed_steps
531
- )
532
- msg += "%s." % ", ".join(self._unprocessed_steps)
533
542
  self._logger(msg, system_msg=True)
534
-
535
- except KeyboardInterrupt as ex:
536
- self._logger("Workflow interrupted.", system_msg=True, bad=True)
537
- self._killall()
538
- exception = ex
539
- raise
540
- except Exception as ex:
541
- self._logger("Workflow failed.", system_msg=True, bad=True)
542
- self._killall()
543
- exception = ex
544
- raise
545
- finally:
546
- # on finish clean tasks
547
- for step in self._flow:
548
- for deco in step.decorators:
549
- deco.runtime_finished(exception)
543
+ if len(self._unprocessed_steps) > 0:
544
+ if len(self._unprocessed_steps) == 1:
545
+ msg = "%s step has not started" % (
546
+ next(iter(self._unprocessed_steps)),
547
+ )
548
+ else:
549
+ msg = "%d steps have not started: " % len(
550
+ self._unprocessed_steps
551
+ )
552
+ msg += "%s." % ", ".join(self._unprocessed_steps)
553
+ self._logger(msg, system_msg=True)
554
+
555
+ except KeyboardInterrupt as ex:
556
+ self._logger("Workflow interrupted.", system_msg=True, bad=True)
557
+ self._killall()
558
+ exception = ex
559
+ raise
560
+ except Exception as ex:
561
+ self._logger("Workflow failed.", system_msg=True, bad=True)
562
+ self._killall()
563
+ exception = ex
564
+ raise
565
+ finally:
566
+ # on finish clean tasks
567
+ for step in self._flow:
568
+ for deco in step.decorators:
569
+ deco.runtime_finished(exception)
550
570
 
551
571
  # assert that end was executed and it was successful
552
572
  if ("end", ()) in self._finished:
@@ -957,7 +977,7 @@ class NativeRuntime(object):
957
977
  )
958
978
  return
959
979
 
960
- worker = Worker(task, self._max_log_size)
980
+ worker = Worker(task, self._max_log_size, self._config_file_name)
961
981
  for fd in worker.fds():
962
982
  self._workers[fd] = worker
963
983
  self._poll.add(fd)
@@ -1237,7 +1257,6 @@ class Task(object):
1237
1257
  # Open the output datastore only if the task is not being cloned.
1238
1258
  if not self._is_cloned:
1239
1259
  self.new_attempt()
1240
-
1241
1260
  for deco in decos:
1242
1261
  deco.runtime_task_created(
1243
1262
  self._ds,
@@ -1504,6 +1523,15 @@ class CLIArgs(object):
1504
1523
  for deco in flow_decorators(self.task.flow):
1505
1524
  self.top_level_options.update(deco.get_top_level_options())
1506
1525
 
1526
+ # We also pass configuration options using the kv.<name> syntax which will cause
1527
+ # the configuration options to be loaded from the CONFIG file (or local-config-file
1528
+ # in the case of the local runtime)
1529
+ configs = self.task.flow._flow_state.get(_FlowState.CONFIGS)
1530
+ if configs:
1531
+ self.top_level_options["config-value"] = [
1532
+ (k, ConfigInput.make_key_name(k)) for k in configs
1533
+ ]
1534
+
1507
1535
  self.commands = ["step"]
1508
1536
  self.command_args = [self.task.step]
1509
1537
  self.command_options = {
@@ -1537,12 +1565,15 @@ class CLIArgs(object):
1537
1565
  for value in v:
1538
1566
  yield "--%s" % k
1539
1567
  if not isinstance(value, bool):
1540
- yield to_unicode(value)
1568
+ value = value if isinstance(value, tuple) else (value,)
1569
+ for vv in value:
1570
+ yield to_unicode(vv)
1541
1571
 
1542
1572
  args = list(self.entrypoint)
1543
1573
  args.extend(_options(self.top_level_options))
1544
1574
  args.extend(self.commands)
1545
1575
  args.extend(self.command_args)
1576
+
1546
1577
  args.extend(_options(self.command_options))
1547
1578
  return args
1548
1579
 
@@ -1554,8 +1585,9 @@ class CLIArgs(object):
1554
1585
 
1555
1586
 
1556
1587
  class Worker(object):
1557
- def __init__(self, task, max_logs_size):
1588
+ def __init__(self, task, max_logs_size, config_file_name):
1558
1589
  self.task = task
1590
+ self._config_file_name = config_file_name
1559
1591
  self._proc = self._launch()
1560
1592
 
1561
1593
  if task.retries > task.user_code_retries:
@@ -1607,6 +1639,12 @@ class Worker(object):
1607
1639
  self.task.user_code_retries,
1608
1640
  self.task.ubf_context,
1609
1641
  )
1642
+
1643
+ # Add user configurations using a file to avoid using up too much space on the
1644
+ # command line
1645
+ if self._config_file_name:
1646
+ args.top_level_options["local-config-file"] = self._config_file_name
1647
+ # Pass configuration options
1610
1648
  env.update(args.get_env())
1611
1649
  env["PYTHONUNBUFFERED"] = "x"
1612
1650
  tracing.inject_tracing_vars(env)
@@ -48,8 +48,8 @@ def process_messages(worker_type, worker):
48
48
  pass
49
49
 
50
50
 
51
- @tracing.cli_entrypoint("sidecar")
52
51
  @click.command(help="Initialize workers")
52
+ @tracing.cli_entrypoint("sidecar")
53
53
  @click.argument("worker-type")
54
54
  def main(worker_type):
55
55
  sidecar_type = SIDECARS.get(worker_type)
File without changes