metaflow 2.18.13__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -0
- metaflow/cli.py +78 -13
- metaflow/cli_components/run_cmds.py +182 -39
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +162 -99
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +123 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +86 -2
- metaflow/decorators.py +75 -6
- metaflow/extension_support/__init__.py +372 -305
- metaflow/flowspec.py +3 -2
- metaflow/metaflow_config.py +41 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/packaging_sys/utils.py +2 -39
- metaflow/packaging_sys/v1.py +63 -16
- metaflow/plugins/__init__.py +2 -0
- metaflow/plugins/cards/card_datastore.py +9 -3
- metaflow/plugins/cards/card_decorator.py +1 -0
- metaflow/plugins/cards/card_modules/basic.py +9 -3
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +29 -10
- metaflow/plugins/datatools/s3/s3op.py +90 -62
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/runner/metaflow_runner.py +210 -19
- metaflow/runtime.py +348 -21
- metaflow/task.py +61 -12
- metaflow/user_configs/config_parameters.py +2 -4
- metaflow/user_decorators/mutable_flow.py +1 -1
- metaflow/user_decorators/user_step_decorator.py +10 -1
- metaflow/util.py +191 -1
- metaflow/version.py +1 -1
- {metaflow-2.18.13.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
- {metaflow-2.18.13.dist-info → metaflow-2.19.0.dist-info}/RECORD +48 -45
- {metaflow-2.18.13.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.18.13.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.18.13.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
metaflow/__init__.py
CHANGED
metaflow/cli.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import functools
|
|
2
3
|
import inspect
|
|
3
4
|
import os
|
|
@@ -7,7 +8,6 @@ from datetime import datetime
|
|
|
7
8
|
|
|
8
9
|
import metaflow.tracing as tracing
|
|
9
10
|
from metaflow._vendor import click
|
|
10
|
-
from metaflow.system import _system_logger, _system_monitor
|
|
11
11
|
|
|
12
12
|
from . import decorators, lint, metaflow_version, parameters, plugins
|
|
13
13
|
from .cli_args import cli_args
|
|
@@ -27,6 +27,8 @@ from .metaflow_config import (
|
|
|
27
27
|
DEFAULT_PACKAGE_SUFFIXES,
|
|
28
28
|
)
|
|
29
29
|
from .metaflow_current import current
|
|
30
|
+
from .metaflow_profile import from_start
|
|
31
|
+
from metaflow.system import _system_monitor, _system_logger
|
|
30
32
|
from .metaflow_environment import MetaflowEnvironment
|
|
31
33
|
from .packaging_sys import MetaflowCodeContent
|
|
32
34
|
from .plugins import (
|
|
@@ -38,9 +40,9 @@ from .plugins import (
|
|
|
38
40
|
)
|
|
39
41
|
from .pylint_wrapper import PyLint
|
|
40
42
|
from .R import metaflow_r_version, use_r
|
|
43
|
+
from .util import get_latest_run_id, resolve_identity, decompress_list
|
|
41
44
|
from .user_configs.config_options import LocalFileInput, config_options
|
|
42
45
|
from .user_configs.config_parameters import ConfigValue
|
|
43
|
-
from .util import get_latest_run_id, resolve_identity
|
|
44
46
|
|
|
45
47
|
ERASE_TO_EOL = "\033[K"
|
|
46
48
|
HIGHLIGHT = "red"
|
|
@@ -125,6 +127,8 @@ def logger(body="", system_msg=False, head="", bad=False, timestamp=True, nl=Tru
|
|
|
125
127
|
"step": "metaflow.cli_components.step_cmd.step",
|
|
126
128
|
"run": "metaflow.cli_components.run_cmds.run",
|
|
127
129
|
"resume": "metaflow.cli_components.run_cmds.resume",
|
|
130
|
+
"spin": "metaflow.cli_components.run_cmds.spin",
|
|
131
|
+
"spin-step": "metaflow.cli_components.step_cmd.spin_step",
|
|
128
132
|
},
|
|
129
133
|
)
|
|
130
134
|
def cli(ctx):
|
|
@@ -318,6 +322,13 @@ def version(obj):
|
|
|
318
322
|
hidden=True,
|
|
319
323
|
is_eager=True,
|
|
320
324
|
)
|
|
325
|
+
@click.option(
|
|
326
|
+
"--mode",
|
|
327
|
+
type=click.Choice(["spin"]),
|
|
328
|
+
default=None,
|
|
329
|
+
help="Execution mode for metaflow CLI commands. Use 'spin' to enable "
|
|
330
|
+
"spin metadata and spin datastore for executions",
|
|
331
|
+
)
|
|
321
332
|
@click.pass_context
|
|
322
333
|
def start(
|
|
323
334
|
ctx,
|
|
@@ -335,6 +346,7 @@ def start(
|
|
|
335
346
|
local_config_file=None,
|
|
336
347
|
config=None,
|
|
337
348
|
config_value=None,
|
|
349
|
+
mode=None,
|
|
338
350
|
**deco_options
|
|
339
351
|
):
|
|
340
352
|
if quiet:
|
|
@@ -347,6 +359,7 @@ def start(
|
|
|
347
359
|
if use_r():
|
|
348
360
|
version = metaflow_r_version()
|
|
349
361
|
|
|
362
|
+
from_start("MetaflowCLI: Starting")
|
|
350
363
|
echo("Metaflow %s" % version, fg="magenta", bold=True, nl=False)
|
|
351
364
|
echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
|
|
352
365
|
echo(" for *%s*" % resolve_identity(), fg="magenta")
|
|
@@ -366,6 +379,7 @@ def start(
|
|
|
366
379
|
ctx.obj.check = functools.partial(_check, echo)
|
|
367
380
|
ctx.obj.top_cli = cli
|
|
368
381
|
ctx.obj.package_suffixes = package_suffixes.split(",")
|
|
382
|
+
ctx.obj.spin_mode = mode == "spin"
|
|
369
383
|
|
|
370
384
|
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
|
|
371
385
|
|
|
@@ -472,19 +486,12 @@ def start(
|
|
|
472
486
|
# set force rebuild flag for environments that support it.
|
|
473
487
|
ctx.obj.environment._force_rebuild = force_rebuild_environments
|
|
474
488
|
ctx.obj.environment.validate_environment(ctx.obj.logger, datastore)
|
|
475
|
-
|
|
476
489
|
ctx.obj.event_logger = LOGGING_SIDECARS[event_logger](
|
|
477
490
|
flow=ctx.obj.flow, env=ctx.obj.environment
|
|
478
491
|
)
|
|
479
|
-
ctx.obj.event_logger.start()
|
|
480
|
-
_system_logger.init_system_logger(ctx.obj.flow.name, ctx.obj.event_logger)
|
|
481
|
-
|
|
482
492
|
ctx.obj.monitor = MONITOR_SIDECARS[monitor](
|
|
483
493
|
flow=ctx.obj.flow, env=ctx.obj.environment
|
|
484
494
|
)
|
|
485
|
-
ctx.obj.monitor.start()
|
|
486
|
-
_system_monitor.init_system_monitor(ctx.obj.flow.name, ctx.obj.monitor)
|
|
487
|
-
|
|
488
495
|
ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == metadata][0](
|
|
489
496
|
ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
|
|
490
497
|
)
|
|
@@ -498,6 +505,57 @@ def start(
|
|
|
498
505
|
)
|
|
499
506
|
|
|
500
507
|
ctx.obj.config_options = config_options
|
|
508
|
+
ctx.obj.is_spin = False
|
|
509
|
+
ctx.obj.skip_decorators = False
|
|
510
|
+
|
|
511
|
+
# Override values for spin steps, or if we are in spin mode
|
|
512
|
+
if (
|
|
513
|
+
hasattr(ctx, "saved_args")
|
|
514
|
+
and ctx.saved_args
|
|
515
|
+
and "spin" in ctx.saved_args[0]
|
|
516
|
+
or ctx.obj.spin_mode
|
|
517
|
+
):
|
|
518
|
+
# To minimize side effects for spin, we will only use the following:
|
|
519
|
+
# - local metadata provider,
|
|
520
|
+
# - local datastore,
|
|
521
|
+
# - local environment,
|
|
522
|
+
# - null event logger,
|
|
523
|
+
# - null monitor
|
|
524
|
+
ctx.obj.is_spin = True
|
|
525
|
+
if "--skip-decorators" in ctx.saved_args:
|
|
526
|
+
ctx.obj.skip_decorators = True
|
|
527
|
+
|
|
528
|
+
ctx.obj.event_logger = LOGGING_SIDECARS["nullSidecarLogger"](
|
|
529
|
+
flow=ctx.obj.flow, env=ctx.obj.environment
|
|
530
|
+
)
|
|
531
|
+
ctx.obj.monitor = MONITOR_SIDECARS["nullSidecarMonitor"](
|
|
532
|
+
flow=ctx.obj.flow, env=ctx.obj.environment
|
|
533
|
+
)
|
|
534
|
+
# Use spin metadata, spin datastore, and spin datastore root
|
|
535
|
+
ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][0](
|
|
536
|
+
ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
|
|
537
|
+
)
|
|
538
|
+
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == "spin"][0]
|
|
539
|
+
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
|
|
540
|
+
ctx.obj.echo, create_on_absent=True
|
|
541
|
+
)
|
|
542
|
+
ctx.obj.datastore_impl.datastore_root = datastore_root
|
|
543
|
+
|
|
544
|
+
ctx.obj.flow_datastore = FlowDataStore(
|
|
545
|
+
ctx.obj.flow.name,
|
|
546
|
+
ctx.obj.environment, # Same environment as run/resume
|
|
547
|
+
ctx.obj.metadata, # local metadata
|
|
548
|
+
ctx.obj.event_logger, # null event logger
|
|
549
|
+
ctx.obj.monitor, # null monitor
|
|
550
|
+
storage_impl=ctx.obj.datastore_impl,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
# Start event logger and monitor
|
|
554
|
+
ctx.obj.event_logger.start()
|
|
555
|
+
_system_logger.init_system_logger(ctx.obj.flow.name, ctx.obj.event_logger)
|
|
556
|
+
|
|
557
|
+
ctx.obj.monitor.start()
|
|
558
|
+
_system_monitor.init_system_monitor(ctx.obj.flow.name, ctx.obj.monitor)
|
|
501
559
|
|
|
502
560
|
decorators._init(ctx.obj.flow)
|
|
503
561
|
|
|
@@ -512,9 +570,11 @@ def start(
|
|
|
512
570
|
ctx.obj.logger,
|
|
513
571
|
echo,
|
|
514
572
|
deco_options,
|
|
573
|
+
ctx.obj.is_spin,
|
|
574
|
+
ctx.obj.skip_decorators,
|
|
515
575
|
)
|
|
516
576
|
|
|
517
|
-
# In the case of run/resume, we will want to apply the TL decospecs
|
|
577
|
+
# In the case of run/resume/spin, we will want to apply the TL decospecs
|
|
518
578
|
# *after* the run decospecs so that they don't take precedence. In other
|
|
519
579
|
# words, for the same decorator, we want `myflow.py run --with foo` to
|
|
520
580
|
# take precedence over any other `foo` decospec
|
|
@@ -542,11 +602,10 @@ def start(
|
|
|
542
602
|
if (
|
|
543
603
|
hasattr(ctx, "saved_args")
|
|
544
604
|
and ctx.saved_args
|
|
545
|
-
and ctx.saved_args[0] not in ("run", "resume")
|
|
605
|
+
and ctx.saved_args[0] not in ("run", "resume", "spin")
|
|
546
606
|
):
|
|
547
|
-
# run/resume are special cases because they can add more decorators with --with,
|
|
607
|
+
# run/resume/spin are special cases because they can add more decorators with --with,
|
|
548
608
|
# so they have to take care of themselves.
|
|
549
|
-
|
|
550
609
|
all_decospecs = ctx.obj.tl_decospecs + list(
|
|
551
610
|
ctx.obj.environment.decospecs() or []
|
|
552
611
|
)
|
|
@@ -556,6 +615,9 @@ def start(
|
|
|
556
615
|
# or a scheduler setting them up in their own way.
|
|
557
616
|
if ctx.saved_args[0] not in ("step", "init"):
|
|
558
617
|
all_decospecs += DEFAULT_DECOSPECS.split()
|
|
618
|
+
elif ctx.saved_args[0] == "spin-step":
|
|
619
|
+
# If we are in spin-args, we will not attach any decorators
|
|
620
|
+
all_decospecs = []
|
|
559
621
|
if all_decospecs:
|
|
560
622
|
decorators._attach_decorators(ctx.obj.flow, all_decospecs)
|
|
561
623
|
decorators._init(ctx.obj.flow)
|
|
@@ -569,6 +631,9 @@ def start(
|
|
|
569
631
|
ctx.obj.environment,
|
|
570
632
|
ctx.obj.flow_datastore,
|
|
571
633
|
ctx.obj.logger,
|
|
634
|
+
# The last two arguments are only used for spin steps
|
|
635
|
+
ctx.obj.is_spin,
|
|
636
|
+
ctx.obj.skip_decorators,
|
|
572
637
|
)
|
|
573
638
|
|
|
574
639
|
# Check the graph again (mutators may have changed it)
|
|
@@ -8,21 +8,26 @@ from .. import decorators, namespace, parameters, tracing
|
|
|
8
8
|
from ..exception import CommandException
|
|
9
9
|
from ..graph import FlowGraph
|
|
10
10
|
from ..metaflow_current import current
|
|
11
|
-
from ..metaflow_config import
|
|
11
|
+
from ..metaflow_config import (
|
|
12
|
+
DEFAULT_DECOSPECS,
|
|
13
|
+
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
|
|
14
|
+
SPIN_PERSIST,
|
|
15
|
+
)
|
|
16
|
+
from ..metaflow_profile import from_start
|
|
12
17
|
from ..package import MetaflowPackage
|
|
13
|
-
from ..runtime import NativeRuntime
|
|
18
|
+
from ..runtime import NativeRuntime, SpinRuntime
|
|
14
19
|
from ..system import _system_logger
|
|
15
20
|
|
|
16
21
|
# from ..client.core import Run
|
|
17
22
|
|
|
18
23
|
from ..tagging_util import validate_tags
|
|
19
|
-
from ..util import get_latest_run_id, write_latest_run_id
|
|
24
|
+
from ..util import get_latest_run_id, write_latest_run_id, parse_spin_pathspec
|
|
20
25
|
|
|
21
26
|
|
|
22
|
-
def before_run(obj, tags, decospecs):
|
|
27
|
+
def before_run(obj, tags, decospecs, skip_decorators=False):
|
|
23
28
|
validate_tags(tags)
|
|
24
29
|
|
|
25
|
-
# There's a --with option both at the top-level and for the run
|
|
30
|
+
# There's a --with option both at the top-level and for the run/resume/spin
|
|
26
31
|
# subcommand. Why?
|
|
27
32
|
#
|
|
28
33
|
# "run --with shoes" looks so much better than "--with shoes run".
|
|
@@ -36,26 +41,36 @@ def before_run(obj, tags, decospecs):
|
|
|
36
41
|
# - run level decospecs
|
|
37
42
|
# - top level decospecs
|
|
38
43
|
# - environment decospecs
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
+ obj.tl_decospecs
|
|
42
|
-
+ list(obj.environment.decospecs() or [])
|
|
43
|
-
)
|
|
44
|
-
if all_decospecs:
|
|
45
|
-
# These decospecs are the ones from run/resume PLUS the ones from the
|
|
46
|
-
# environment (for example the @conda)
|
|
47
|
-
decorators._attach_decorators(obj.flow, all_decospecs)
|
|
48
|
-
decorators._init(obj.flow)
|
|
49
|
-
# Regenerate graph if we attached more decorators
|
|
50
|
-
obj.flow.__class__._init_graph()
|
|
51
|
-
obj.graph = obj.flow._graph
|
|
52
|
-
|
|
53
|
-
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
|
|
54
|
-
# obj.environment.init_environment(obj.logger)
|
|
55
|
-
|
|
56
|
-
decorators._init_step_decorators(
|
|
57
|
-
obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
|
|
44
|
+
from_start(
|
|
45
|
+
f"Inside before_run, skip_decorators={skip_decorators}, is_spin={obj.is_spin}"
|
|
58
46
|
)
|
|
47
|
+
if not skip_decorators:
|
|
48
|
+
all_decospecs = (
|
|
49
|
+
list(decospecs or [])
|
|
50
|
+
+ obj.tl_decospecs
|
|
51
|
+
+ list(obj.environment.decospecs() or [])
|
|
52
|
+
)
|
|
53
|
+
if all_decospecs:
|
|
54
|
+
# These decospecs are the ones from run/resume/spin PLUS the ones from the
|
|
55
|
+
# environment (for example the @conda)
|
|
56
|
+
decorators._attach_decorators(obj.flow, all_decospecs)
|
|
57
|
+
decorators._init(obj.flow)
|
|
58
|
+
# Regenerate graph if we attached more decorators
|
|
59
|
+
obj.flow.__class__._init_graph()
|
|
60
|
+
obj.graph = obj.flow._graph
|
|
61
|
+
|
|
62
|
+
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
|
|
63
|
+
# obj.environment.init_environment(obj.logger)
|
|
64
|
+
|
|
65
|
+
decorators._init_step_decorators(
|
|
66
|
+
obj.flow,
|
|
67
|
+
obj.graph,
|
|
68
|
+
obj.environment,
|
|
69
|
+
obj.flow_datastore,
|
|
70
|
+
obj.logger,
|
|
71
|
+
obj.is_spin,
|
|
72
|
+
skip_decorators,
|
|
73
|
+
)
|
|
59
74
|
# Re-read graph since it may have been modified by mutators
|
|
60
75
|
obj.graph = obj.flow._graph
|
|
61
76
|
|
|
@@ -73,6 +88,29 @@ def before_run(obj, tags, decospecs):
|
|
|
73
88
|
)
|
|
74
89
|
|
|
75
90
|
|
|
91
|
+
def common_runner_options(func):
|
|
92
|
+
@click.option(
|
|
93
|
+
"--run-id-file",
|
|
94
|
+
default=None,
|
|
95
|
+
show_default=True,
|
|
96
|
+
type=str,
|
|
97
|
+
help="Write the ID of this run to the file specified.",
|
|
98
|
+
)
|
|
99
|
+
@click.option(
|
|
100
|
+
"--runner-attribute-file",
|
|
101
|
+
default=None,
|
|
102
|
+
show_default=True,
|
|
103
|
+
type=str,
|
|
104
|
+
help="Write the metadata and pathspec of this run to the file specified. Used internally "
|
|
105
|
+
"for Metaflow's Runner API.",
|
|
106
|
+
)
|
|
107
|
+
@wraps(func)
|
|
108
|
+
def wrapper(*args, **kwargs):
|
|
109
|
+
return func(*args, **kwargs)
|
|
110
|
+
|
|
111
|
+
return wrapper
|
|
112
|
+
|
|
113
|
+
|
|
76
114
|
def write_file(file_path, content):
|
|
77
115
|
if file_path is not None:
|
|
78
116
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
@@ -137,20 +175,6 @@ def common_run_options(func):
|
|
|
137
175
|
"in steps.",
|
|
138
176
|
callback=config_callback,
|
|
139
177
|
)
|
|
140
|
-
@click.option(
|
|
141
|
-
"--run-id-file",
|
|
142
|
-
default=None,
|
|
143
|
-
show_default=True,
|
|
144
|
-
type=str,
|
|
145
|
-
help="Write the ID of this run to the file specified.",
|
|
146
|
-
)
|
|
147
|
-
@click.option(
|
|
148
|
-
"--runner-attribute-file",
|
|
149
|
-
default=None,
|
|
150
|
-
show_default=True,
|
|
151
|
-
type=str,
|
|
152
|
-
help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
|
|
153
|
-
)
|
|
154
178
|
@wraps(func)
|
|
155
179
|
def wrapper(*args, **kwargs):
|
|
156
180
|
return func(*args, **kwargs)
|
|
@@ -195,6 +219,7 @@ def common_run_options(func):
|
|
|
195
219
|
@click.command(help="Resume execution of a previous run of this flow.")
|
|
196
220
|
@tracing.cli("cli/resume")
|
|
197
221
|
@common_run_options
|
|
222
|
+
@common_runner_options
|
|
198
223
|
@click.pass_obj
|
|
199
224
|
def resume(
|
|
200
225
|
obj,
|
|
@@ -326,6 +351,7 @@ def resume(
|
|
|
326
351
|
@click.command(help="Run the workflow locally.")
|
|
327
352
|
@tracing.cli("cli/run")
|
|
328
353
|
@common_run_options
|
|
354
|
+
@common_runner_options
|
|
329
355
|
@click.option(
|
|
330
356
|
"--namespace",
|
|
331
357
|
"user_namespace",
|
|
@@ -348,7 +374,7 @@ def run(
|
|
|
348
374
|
run_id_file=None,
|
|
349
375
|
runner_attribute_file=None,
|
|
350
376
|
user_namespace=None,
|
|
351
|
-
**kwargs
|
|
377
|
+
**kwargs,
|
|
352
378
|
):
|
|
353
379
|
if user_namespace is not None:
|
|
354
380
|
namespace(user_namespace or None)
|
|
@@ -401,3 +427,120 @@ def run(
|
|
|
401
427
|
)
|
|
402
428
|
with runtime.run_heartbeat():
|
|
403
429
|
runtime.execute()
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
# @parameters.add_custom_parameters(deploy_mode=True)
|
|
433
|
+
@click.command(help="Spins up a task for a given step from a previous run locally.")
|
|
434
|
+
@tracing.cli("cli/spin")
|
|
435
|
+
@click.argument("pathspec")
|
|
436
|
+
@click.option(
|
|
437
|
+
"--skip-decorators/--no-skip-decorators",
|
|
438
|
+
is_flag=True,
|
|
439
|
+
# Default False matches the saved_args check in cli.py for spin steps - skip_decorators
|
|
440
|
+
# only becomes True when explicitly passed, otherwise decorators are applied by default
|
|
441
|
+
default=False,
|
|
442
|
+
show_default=True,
|
|
443
|
+
help="Skip decorators attached to the step or flow.",
|
|
444
|
+
)
|
|
445
|
+
@click.option(
|
|
446
|
+
"--artifacts-module",
|
|
447
|
+
default=None,
|
|
448
|
+
show_default=True,
|
|
449
|
+
help="Path to a module that contains artifacts to be used in the spun step. "
|
|
450
|
+
"The artifacts should be defined as a dictionary called ARTIFACTS with keys as "
|
|
451
|
+
"the artifact names and values as the artifact values. The artifact values will "
|
|
452
|
+
"overwrite the default values of the artifacts used in the spun step.",
|
|
453
|
+
)
|
|
454
|
+
@click.option(
|
|
455
|
+
"--persist/--no-persist",
|
|
456
|
+
"persist",
|
|
457
|
+
default=SPIN_PERSIST,
|
|
458
|
+
show_default=True,
|
|
459
|
+
help="Whether to persist the artifacts in the spun step. If set to False, "
|
|
460
|
+
"the artifacts will not be persisted and will not be available in the spun step's "
|
|
461
|
+
"datastore.",
|
|
462
|
+
)
|
|
463
|
+
@click.option(
|
|
464
|
+
"--max-log-size",
|
|
465
|
+
default=10,
|
|
466
|
+
show_default=True,
|
|
467
|
+
help="Maximum size of stdout and stderr captured in "
|
|
468
|
+
"megabytes. If a step outputs more than this to "
|
|
469
|
+
"stdout/stderr, its output will be truncated.",
|
|
470
|
+
)
|
|
471
|
+
@common_runner_options
|
|
472
|
+
@click.pass_obj
|
|
473
|
+
def spin(
|
|
474
|
+
obj,
|
|
475
|
+
pathspec,
|
|
476
|
+
persist=True,
|
|
477
|
+
artifacts_module=None,
|
|
478
|
+
skip_decorators=False,
|
|
479
|
+
max_log_size=None,
|
|
480
|
+
run_id_file=None,
|
|
481
|
+
runner_attribute_file=None,
|
|
482
|
+
**kwargs,
|
|
483
|
+
):
|
|
484
|
+
# Parse the pathspec argument to extract step name and full pathspec
|
|
485
|
+
step_name, parsed_pathspec = parse_spin_pathspec(pathspec, obj.flow.name)
|
|
486
|
+
|
|
487
|
+
before_run(obj, [], [], skip_decorators)
|
|
488
|
+
obj.echo(f"Spinning up step *{step_name}* locally for flow *{obj.flow.name}*")
|
|
489
|
+
# For spin, flow parameters come from the original run, but _set_constants
|
|
490
|
+
# requires them in kwargs. Use parameter defaults as placeholders - they'll be
|
|
491
|
+
# overwritten when the spin step loads artifacts from the original run.
|
|
492
|
+
flow_param_defaults = {}
|
|
493
|
+
for var, param in obj.flow._get_parameters():
|
|
494
|
+
if not param.IS_CONFIG_PARAMETER:
|
|
495
|
+
default_value = param.kwargs.get("default")
|
|
496
|
+
# Use None for required parameters without defaults
|
|
497
|
+
flow_param_defaults[param.name.replace("-", "_").lower()] = default_value
|
|
498
|
+
obj.flow._set_constants(obj.graph, flow_param_defaults, obj.config_options)
|
|
499
|
+
step_func = getattr(obj.flow, step_name, None)
|
|
500
|
+
if step_func is None:
|
|
501
|
+
raise CommandException(
|
|
502
|
+
f"Step '{step_name}' not found in flow '{obj.flow.name}'. "
|
|
503
|
+
"Please provide a valid step name."
|
|
504
|
+
)
|
|
505
|
+
from_start("Spin: before spin runtime init")
|
|
506
|
+
spin_runtime = SpinRuntime(
|
|
507
|
+
obj.flow,
|
|
508
|
+
obj.graph,
|
|
509
|
+
obj.flow_datastore,
|
|
510
|
+
obj.metadata,
|
|
511
|
+
obj.environment,
|
|
512
|
+
obj.package,
|
|
513
|
+
obj.logger,
|
|
514
|
+
obj.entrypoint,
|
|
515
|
+
obj.event_logger,
|
|
516
|
+
obj.monitor,
|
|
517
|
+
step_func,
|
|
518
|
+
step_name,
|
|
519
|
+
parsed_pathspec,
|
|
520
|
+
skip_decorators,
|
|
521
|
+
artifacts_module,
|
|
522
|
+
persist,
|
|
523
|
+
max_log_size * 1024 * 1024,
|
|
524
|
+
)
|
|
525
|
+
write_latest_run_id(obj, spin_runtime.run_id)
|
|
526
|
+
write_file(run_id_file, spin_runtime.run_id)
|
|
527
|
+
# We only need the root for the metadata, i.e. the portion before DATASTORE_LOCAL_DIR
|
|
528
|
+
datastore_root = spin_runtime._flow_datastore._storage_impl.datastore_root
|
|
529
|
+
orig_task_metadata_root = datastore_root.rsplit("/", 1)[0]
|
|
530
|
+
from_start("Spin: going to execute")
|
|
531
|
+
spin_runtime.execute()
|
|
532
|
+
from_start("Spin: after spin runtime execute")
|
|
533
|
+
|
|
534
|
+
if runner_attribute_file:
|
|
535
|
+
with open(runner_attribute_file, "w") as f:
|
|
536
|
+
json.dump(
|
|
537
|
+
{
|
|
538
|
+
"task_id": spin_runtime.task.task_id,
|
|
539
|
+
"step_name": step_name,
|
|
540
|
+
"run_id": spin_runtime.run_id,
|
|
541
|
+
"flow_name": obj.flow.name,
|
|
542
|
+
# Store metadata in a format that can be used by the Runner API
|
|
543
|
+
"metadata": f"{obj.metadata.__class__.TYPE}@{orig_task_metadata_root}",
|
|
544
|
+
},
|
|
545
|
+
f,
|
|
546
|
+
)
|
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
from metaflow._vendor import click
|
|
2
2
|
|
|
3
|
-
from .. import
|
|
3
|
+
from .. import namespace
|
|
4
4
|
from ..cli import echo_always, echo_dev_null
|
|
5
5
|
from ..cli_args import cli_args
|
|
6
|
+
from ..datastore.flow_datastore import FlowDataStore
|
|
6
7
|
from ..exception import CommandException
|
|
8
|
+
from ..client.filecache import FileCache, FileBlobCache, TaskMetadataCache
|
|
9
|
+
from ..metaflow_config import SPIN_ALLOWED_DECORATORS
|
|
10
|
+
from ..metaflow_profile import from_start
|
|
11
|
+
from ..plugins import DATASTORES
|
|
7
12
|
from ..task import MetaflowTask
|
|
8
13
|
from ..unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
9
|
-
from ..util import decompress_list
|
|
14
|
+
from ..util import decompress_list, read_artifacts_module
|
|
10
15
|
import metaflow.tracing as tracing
|
|
11
16
|
|
|
12
17
|
|
|
@@ -109,7 +114,6 @@ def step(
|
|
|
109
114
|
ubf_context="none",
|
|
110
115
|
num_parallel=None,
|
|
111
116
|
):
|
|
112
|
-
|
|
113
117
|
if ctx.obj.is_quiet:
|
|
114
118
|
echo = echo_dev_null
|
|
115
119
|
else:
|
|
@@ -118,7 +122,7 @@ def step(
|
|
|
118
122
|
if ubf_context == "none":
|
|
119
123
|
ubf_context = None
|
|
120
124
|
if opt_namespace is not None:
|
|
121
|
-
namespace(opt_namespace
|
|
125
|
+
namespace(opt_namespace)
|
|
122
126
|
|
|
123
127
|
func = None
|
|
124
128
|
try:
|
|
@@ -176,3 +180,155 @@ def step(
|
|
|
176
180
|
)
|
|
177
181
|
|
|
178
182
|
echo("Success", fg="green", bold=True, indent=True)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@click.command(help="Internal command to spin a single task.", hidden=True)
|
|
186
|
+
@click.argument("step-name")
|
|
187
|
+
@click.option(
|
|
188
|
+
"--run-id",
|
|
189
|
+
default=None,
|
|
190
|
+
required=True,
|
|
191
|
+
help="Original run ID for the step that will be spun",
|
|
192
|
+
)
|
|
193
|
+
@click.option(
|
|
194
|
+
"--task-id",
|
|
195
|
+
default=None,
|
|
196
|
+
required=True,
|
|
197
|
+
help="Original Task ID for the step that will be spun",
|
|
198
|
+
)
|
|
199
|
+
@click.option(
|
|
200
|
+
"--orig-flow-datastore",
|
|
201
|
+
show_default=True,
|
|
202
|
+
help="Original datastore for the flow from which a task is being spun",
|
|
203
|
+
)
|
|
204
|
+
@click.option(
|
|
205
|
+
"--input-paths",
|
|
206
|
+
help="A comma-separated list of pathspecs specifying inputs for this step.",
|
|
207
|
+
)
|
|
208
|
+
@click.option(
|
|
209
|
+
"--split-index",
|
|
210
|
+
type=int,
|
|
211
|
+
default=None,
|
|
212
|
+
show_default=True,
|
|
213
|
+
help="Index of this foreach split.",
|
|
214
|
+
)
|
|
215
|
+
@click.option(
|
|
216
|
+
"--retry-count",
|
|
217
|
+
default=0,
|
|
218
|
+
help="How many times we have attempted to run this task.",
|
|
219
|
+
)
|
|
220
|
+
@click.option(
|
|
221
|
+
"--max-user-code-retries",
|
|
222
|
+
default=0,
|
|
223
|
+
help="How many times we should attempt running the user code.",
|
|
224
|
+
)
|
|
225
|
+
@click.option(
|
|
226
|
+
"--namespace",
|
|
227
|
+
"opt_namespace",
|
|
228
|
+
default=None,
|
|
229
|
+
help="Change namespace from the default (your username) to the specified tag.",
|
|
230
|
+
)
|
|
231
|
+
@click.option(
|
|
232
|
+
"--skip-decorators/--no-skip-decorators",
|
|
233
|
+
is_flag=True,
|
|
234
|
+
default=False,
|
|
235
|
+
show_default=True,
|
|
236
|
+
help="Skip decorators attached to the step or flow.",
|
|
237
|
+
)
|
|
238
|
+
@click.option(
|
|
239
|
+
"--persist/--no-persist",
|
|
240
|
+
"persist",
|
|
241
|
+
default=True,
|
|
242
|
+
show_default=True,
|
|
243
|
+
help="Whether to persist the artifacts in the spun step. If set to false, the artifacts will not"
|
|
244
|
+
" be persisted and will not be available in the spun step's datastore.",
|
|
245
|
+
)
|
|
246
|
+
@click.option(
|
|
247
|
+
"--artifacts-module",
|
|
248
|
+
default=None,
|
|
249
|
+
show_default=True,
|
|
250
|
+
help="Path to a module that contains artifacts to be used in the spun step. The artifacts should "
|
|
251
|
+
"be defined as a dictionary called ARTIFACTS with keys as the artifact names and values as the "
|
|
252
|
+
"artifact values. The artifact values will overwrite the default values of the artifacts used in "
|
|
253
|
+
"the spun step.",
|
|
254
|
+
)
|
|
255
|
+
@click.pass_context
|
|
256
|
+
def spin_step(
|
|
257
|
+
ctx,
|
|
258
|
+
step_name,
|
|
259
|
+
orig_flow_datastore,
|
|
260
|
+
run_id=None,
|
|
261
|
+
task_id=None,
|
|
262
|
+
input_paths=None,
|
|
263
|
+
split_index=None,
|
|
264
|
+
retry_count=None,
|
|
265
|
+
max_user_code_retries=None,
|
|
266
|
+
opt_namespace=None,
|
|
267
|
+
skip_decorators=False,
|
|
268
|
+
artifacts_module=None,
|
|
269
|
+
persist=True,
|
|
270
|
+
):
|
|
271
|
+
import time
|
|
272
|
+
|
|
273
|
+
if ctx.obj.is_quiet:
|
|
274
|
+
echo = echo_dev_null
|
|
275
|
+
else:
|
|
276
|
+
echo = echo_always
|
|
277
|
+
|
|
278
|
+
if opt_namespace is not None:
|
|
279
|
+
namespace(opt_namespace)
|
|
280
|
+
|
|
281
|
+
input_paths = decompress_list(input_paths) if input_paths else []
|
|
282
|
+
|
|
283
|
+
skip_decorators = skip_decorators
|
|
284
|
+
whitelist_decorators = [] if skip_decorators else SPIN_ALLOWED_DECORATORS
|
|
285
|
+
from_start("SpinStep: initialized decorators")
|
|
286
|
+
spin_artifacts = read_artifacts_module(artifacts_module) if artifacts_module else {}
|
|
287
|
+
from_start("SpinStep: read artifacts module")
|
|
288
|
+
|
|
289
|
+
ds_type, ds_root = orig_flow_datastore.split("@")
|
|
290
|
+
orig_datastore_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
|
|
291
|
+
orig_datastore_impl.datastore_root = ds_root
|
|
292
|
+
orig_flow_datastore = FlowDataStore(
|
|
293
|
+
ctx.obj.flow.name,
|
|
294
|
+
environment=None,
|
|
295
|
+
storage_impl=orig_datastore_impl,
|
|
296
|
+
ds_root=ds_root,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
filecache = FileCache()
|
|
300
|
+
orig_flow_datastore.set_metadata_cache(
|
|
301
|
+
TaskMetadataCache(filecache, ds_type, ds_root, ctx.obj.flow.name)
|
|
302
|
+
)
|
|
303
|
+
orig_flow_datastore.ca_store.set_blob_cache(
|
|
304
|
+
FileBlobCache(
|
|
305
|
+
filecache, FileCache.flow_ds_id(ds_type, ds_root, ctx.obj.flow.name)
|
|
306
|
+
)
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
task = MetaflowTask(
|
|
310
|
+
ctx.obj.flow,
|
|
311
|
+
ctx.obj.flow_datastore,
|
|
312
|
+
ctx.obj.metadata,
|
|
313
|
+
ctx.obj.environment,
|
|
314
|
+
echo,
|
|
315
|
+
ctx.obj.event_logger,
|
|
316
|
+
ctx.obj.monitor,
|
|
317
|
+
None, # no unbounded foreach context
|
|
318
|
+
orig_flow_datastore=orig_flow_datastore,
|
|
319
|
+
spin_artifacts=spin_artifacts,
|
|
320
|
+
)
|
|
321
|
+
from_start("SpinStep: initialized task")
|
|
322
|
+
task.run_step(
|
|
323
|
+
step_name,
|
|
324
|
+
run_id,
|
|
325
|
+
task_id,
|
|
326
|
+
None,
|
|
327
|
+
input_paths,
|
|
328
|
+
split_index,
|
|
329
|
+
retry_count,
|
|
330
|
+
max_user_code_retries,
|
|
331
|
+
whitelist_decorators,
|
|
332
|
+
persist,
|
|
333
|
+
)
|
|
334
|
+
from_start("SpinStep: ran step")
|