metaflow 2.12.36__py2.py3-none-any.whl → 2.12.38__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +3 -0
- metaflow/cli.py +84 -697
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +358 -0
- metaflow/cli_components/step_cmd.py +189 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/decorators.py +63 -2
- metaflow/extension_support/plugins.py +41 -27
- metaflow/flowspec.py +156 -16
- metaflow/includefile.py +50 -22
- metaflow/metaflow_config.py +1 -1
- metaflow/package.py +17 -3
- metaflow/parameters.py +80 -23
- metaflow/plugins/__init__.py +4 -0
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_workflows.py +41 -1
- metaflow/plugins/argo/argo_workflows_cli.py +1 -0
- metaflow/plugins/aws/batch/batch_decorator.py +2 -2
- metaflow/plugins/aws/step_functions/step_functions.py +32 -0
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
- metaflow/plugins/pypi/conda_decorator.py +22 -0
- metaflow/plugins/pypi/pypi_decorator.py +1 -0
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +73 -19
- metaflow/runtime.py +111 -73
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +495 -0
- metaflow/user_configs/config_parameters.py +386 -0
- metaflow/util.py +17 -0
- metaflow/version.py +1 -1
- {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/METADATA +3 -2
- {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/RECORD +45 -35
- {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/LICENSE +0 -0
- {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/WHEEL +0 -0
- {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/top_level.txt +0 -0
metaflow/runtime.py
CHANGED
@@ -6,10 +6,12 @@ using local / remote processes
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from __future__ import print_function
|
9
|
+
import json
|
9
10
|
import os
|
10
11
|
import sys
|
11
12
|
import fcntl
|
12
13
|
import re
|
14
|
+
import tempfile
|
13
15
|
import time
|
14
16
|
import subprocess
|
15
17
|
from datetime import datetime
|
@@ -32,6 +34,7 @@ from . import procpoll
|
|
32
34
|
from .datastore import TaskDataStoreSet
|
33
35
|
from .debug import debug
|
34
36
|
from .decorators import flow_decorators
|
37
|
+
from .flowspec import _FlowState
|
35
38
|
from .mflog import mflog, RUNTIME_LOG_SOURCE
|
36
39
|
from .util import to_unicode, compress_list, unicode_type
|
37
40
|
from .clone_util import clone_task_helper
|
@@ -40,6 +43,10 @@ from .unbounded_foreach import (
|
|
40
43
|
UBF_CONTROL,
|
41
44
|
UBF_TASK,
|
42
45
|
)
|
46
|
+
|
47
|
+
from .user_configs.config_options import ConfigInput
|
48
|
+
from .user_configs.config_parameters import dump_config_values
|
49
|
+
|
43
50
|
import metaflow.tracing as tracing
|
44
51
|
|
45
52
|
MAX_WORKERS = 16
|
@@ -471,82 +478,95 @@ class NativeRuntime(object):
|
|
471
478
|
else:
|
472
479
|
self._queue_push("start", {})
|
473
480
|
progress_tstamp = time.time()
|
474
|
-
|
475
|
-
#
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
#
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
for k, v in self._active_tasks.items()
|
498
|
-
if k != 0 and v[0] > 0
|
499
|
-
]
|
500
|
-
)
|
501
|
-
if self._active_tasks[0] == 0:
|
502
|
-
msg = "No tasks are running."
|
481
|
+
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
|
482
|
+
# Configurations are passed through a file to avoid overloading the
|
483
|
+
# command-line. We only need to create this file once and it can be reused
|
484
|
+
# for any task launch
|
485
|
+
config_value = dump_config_values(self._flow)
|
486
|
+
if config_value:
|
487
|
+
json.dump(config_value, config_file)
|
488
|
+
config_file.flush()
|
489
|
+
self._config_file_name = config_file.name
|
490
|
+
else:
|
491
|
+
self._config_file_name = None
|
492
|
+
try:
|
493
|
+
# main scheduling loop
|
494
|
+
exception = None
|
495
|
+
while (
|
496
|
+
self._run_queue or self._active_tasks[0] > 0 or self._cloned_tasks
|
497
|
+
):
|
498
|
+
# 1. are any of the current workers finished?
|
499
|
+
if self._cloned_tasks:
|
500
|
+
finished_tasks = self._cloned_tasks
|
501
|
+
# reset the list of cloned tasks and let poll_workers handle
|
502
|
+
# the remaining transition
|
503
|
+
self._cloned_tasks = []
|
503
504
|
else:
|
504
|
-
|
505
|
-
|
505
|
+
finished_tasks = list(self._poll_workers())
|
506
|
+
# 2. push new tasks triggered by the finished tasks to the queue
|
507
|
+
self._queue_tasks(finished_tasks)
|
508
|
+
# 3. if there are available worker slots, pop and start tasks
|
509
|
+
# from the queue.
|
510
|
+
self._launch_workers()
|
511
|
+
|
512
|
+
if time.time() - progress_tstamp > PROGRESS_INTERVAL:
|
513
|
+
progress_tstamp = time.time()
|
514
|
+
tasks_print = ", ".join(
|
515
|
+
[
|
516
|
+
"%s (%d running; %d done)" % (k, v[0], v[1])
|
517
|
+
for k, v in self._active_tasks.items()
|
518
|
+
if k != 0 and v[0] > 0
|
519
|
+
]
|
520
|
+
)
|
521
|
+
if self._active_tasks[0] == 0:
|
522
|
+
msg = "No tasks are running."
|
506
523
|
else:
|
507
|
-
|
508
|
-
|
524
|
+
if self._active_tasks[0] == 1:
|
525
|
+
msg = "1 task is running: "
|
526
|
+
else:
|
527
|
+
msg = "%d tasks are running: " % self._active_tasks[0]
|
528
|
+
msg += "%s." % tasks_print
|
509
529
|
|
510
|
-
|
530
|
+
self._logger(msg, system_msg=True)
|
511
531
|
|
512
|
-
|
513
|
-
|
514
|
-
else:
|
515
|
-
if len(self._run_queue) == 1:
|
516
|
-
msg = "1 task is waiting in the queue: "
|
532
|
+
if len(self._run_queue) == 0:
|
533
|
+
msg = "No tasks are waiting in the queue."
|
517
534
|
else:
|
518
|
-
|
519
|
-
|
520
|
-
|
535
|
+
if len(self._run_queue) == 1:
|
536
|
+
msg = "1 task is waiting in the queue: "
|
537
|
+
else:
|
538
|
+
msg = "%d tasks are waiting in the queue." % len(
|
539
|
+
self._run_queue
|
540
|
+
)
|
521
541
|
|
522
|
-
self._logger(msg, system_msg=True)
|
523
|
-
if len(self._unprocessed_steps) > 0:
|
524
|
-
if len(self._unprocessed_steps) == 1:
|
525
|
-
msg = "%s step has not started" % (
|
526
|
-
next(iter(self._unprocessed_steps)),
|
527
|
-
)
|
528
|
-
else:
|
529
|
-
msg = "%d steps have not started: " % len(
|
530
|
-
self._unprocessed_steps
|
531
|
-
)
|
532
|
-
msg += "%s." % ", ".join(self._unprocessed_steps)
|
533
542
|
self._logger(msg, system_msg=True)
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
543
|
+
if len(self._unprocessed_steps) > 0:
|
544
|
+
if len(self._unprocessed_steps) == 1:
|
545
|
+
msg = "%s step has not started" % (
|
546
|
+
next(iter(self._unprocessed_steps)),
|
547
|
+
)
|
548
|
+
else:
|
549
|
+
msg = "%d steps have not started: " % len(
|
550
|
+
self._unprocessed_steps
|
551
|
+
)
|
552
|
+
msg += "%s." % ", ".join(self._unprocessed_steps)
|
553
|
+
self._logger(msg, system_msg=True)
|
554
|
+
|
555
|
+
except KeyboardInterrupt as ex:
|
556
|
+
self._logger("Workflow interrupted.", system_msg=True, bad=True)
|
557
|
+
self._killall()
|
558
|
+
exception = ex
|
559
|
+
raise
|
560
|
+
except Exception as ex:
|
561
|
+
self._logger("Workflow failed.", system_msg=True, bad=True)
|
562
|
+
self._killall()
|
563
|
+
exception = ex
|
564
|
+
raise
|
565
|
+
finally:
|
566
|
+
# on finish clean tasks
|
567
|
+
for step in self._flow:
|
568
|
+
for deco in step.decorators:
|
569
|
+
deco.runtime_finished(exception)
|
550
570
|
|
551
571
|
# assert that end was executed and it was successful
|
552
572
|
if ("end", ()) in self._finished:
|
@@ -957,7 +977,7 @@ class NativeRuntime(object):
|
|
957
977
|
)
|
958
978
|
return
|
959
979
|
|
960
|
-
worker = Worker(task, self._max_log_size)
|
980
|
+
worker = Worker(task, self._max_log_size, self._config_file_name)
|
961
981
|
for fd in worker.fds():
|
962
982
|
self._workers[fd] = worker
|
963
983
|
self._poll.add(fd)
|
@@ -1237,7 +1257,6 @@ class Task(object):
|
|
1237
1257
|
# Open the output datastore only if the task is not being cloned.
|
1238
1258
|
if not self._is_cloned:
|
1239
1259
|
self.new_attempt()
|
1240
|
-
|
1241
1260
|
for deco in decos:
|
1242
1261
|
deco.runtime_task_created(
|
1243
1262
|
self._ds,
|
@@ -1504,6 +1523,15 @@ class CLIArgs(object):
|
|
1504
1523
|
for deco in flow_decorators(self.task.flow):
|
1505
1524
|
self.top_level_options.update(deco.get_top_level_options())
|
1506
1525
|
|
1526
|
+
# We also pass configuration options using the kv.<name> syntax which will cause
|
1527
|
+
# the configuration options to be loaded from the CONFIG file (or local-config-file
|
1528
|
+
# in the case of the local runtime)
|
1529
|
+
configs = self.task.flow._flow_state.get(_FlowState.CONFIGS)
|
1530
|
+
if configs:
|
1531
|
+
self.top_level_options["config-value"] = [
|
1532
|
+
(k, ConfigInput.make_key_name(k)) for k in configs
|
1533
|
+
]
|
1534
|
+
|
1507
1535
|
self.commands = ["step"]
|
1508
1536
|
self.command_args = [self.task.step]
|
1509
1537
|
self.command_options = {
|
@@ -1537,12 +1565,15 @@ class CLIArgs(object):
|
|
1537
1565
|
for value in v:
|
1538
1566
|
yield "--%s" % k
|
1539
1567
|
if not isinstance(value, bool):
|
1540
|
-
|
1568
|
+
value = value if isinstance(value, tuple) else (value,)
|
1569
|
+
for vv in value:
|
1570
|
+
yield to_unicode(vv)
|
1541
1571
|
|
1542
1572
|
args = list(self.entrypoint)
|
1543
1573
|
args.extend(_options(self.top_level_options))
|
1544
1574
|
args.extend(self.commands)
|
1545
1575
|
args.extend(self.command_args)
|
1576
|
+
|
1546
1577
|
args.extend(_options(self.command_options))
|
1547
1578
|
return args
|
1548
1579
|
|
@@ -1554,8 +1585,9 @@ class CLIArgs(object):
|
|
1554
1585
|
|
1555
1586
|
|
1556
1587
|
class Worker(object):
|
1557
|
-
def __init__(self, task, max_logs_size):
|
1588
|
+
def __init__(self, task, max_logs_size, config_file_name):
|
1558
1589
|
self.task = task
|
1590
|
+
self._config_file_name = config_file_name
|
1559
1591
|
self._proc = self._launch()
|
1560
1592
|
|
1561
1593
|
if task.retries > task.user_code_retries:
|
@@ -1607,6 +1639,12 @@ class Worker(object):
|
|
1607
1639
|
self.task.user_code_retries,
|
1608
1640
|
self.task.ubf_context,
|
1609
1641
|
)
|
1642
|
+
|
1643
|
+
# Add user configurations using a file to avoid using up too much space on the
|
1644
|
+
# command line
|
1645
|
+
if self._config_file_name:
|
1646
|
+
args.top_level_options["local-config-file"] = self._config_file_name
|
1647
|
+
# Pass configuration options
|
1610
1648
|
env.update(args.get_env())
|
1611
1649
|
env["PYTHONUNBUFFERED"] = "x"
|
1612
1650
|
tracing.inject_tracing_vars(env)
|
@@ -48,8 +48,8 @@ def process_messages(worker_type, worker):
|
|
48
48
|
pass
|
49
49
|
|
50
50
|
|
51
|
-
@tracing.cli_entrypoint("sidecar")
|
52
51
|
@click.command(help="Initialize workers")
|
52
|
+
@tracing.cli_entrypoint("sidecar")
|
53
53
|
@click.argument("worker-type")
|
54
54
|
def main(worker_type):
|
55
55
|
sidecar_type = SIDECARS.get(worker_type)
|
File without changes
|