ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +3 -0
- metaflow/cards.py +1 -0
- metaflow/cli.py +185 -717
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +362 -0
- metaflow/cli_components/step_cmd.py +176 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/decorators.py +63 -2
- metaflow/exception.py +8 -2
- metaflow/extension_support/plugins.py +42 -27
- metaflow/flowspec.py +176 -23
- metaflow/graph.py +28 -27
- metaflow/includefile.py +50 -22
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +10 -1
- metaflow/multicore_utils.py +31 -14
- metaflow/package.py +17 -3
- metaflow/parameters.py +97 -25
- metaflow/plugins/__init__.py +22 -0
- metaflow/plugins/airflow/airflow.py +18 -17
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +195 -132
- metaflow/plugins/argo/argo_workflows_cli.py +1 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +13 -13
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +33 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
- metaflow/plugins/cards/card_cli.py +7 -2
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +79 -8
- metaflow/plugins/cards/card_modules/basic.py +56 -5
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/components.py +64 -16
- metaflow/plugins/cards/card_modules/main.js +27 -25
- metaflow/plugins/cards/card_modules/test_cards.py +4 -4
- metaflow/plugins/cards/component_serializer.py +1 -1
- metaflow/plugins/datatools/s3/s3.py +12 -4
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/events_decorator.py +338 -186
- metaflow/plugins/kubernetes/kube_utils.py +84 -1
- metaflow/plugins/kubernetes/kubernetes.py +40 -92
- metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
- metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/parallel_decorator.py +4 -1
- metaflow/plugins/project_decorator.py +33 -5
- metaflow/plugins/pypi/bootstrap.py +249 -81
- metaflow/plugins/pypi/conda_decorator.py +20 -10
- metaflow/plugins/pypi/conda_environment.py +83 -27
- metaflow/plugins/pypi/micromamba.py +82 -37
- metaflow/plugins/pypi/pip.py +9 -6
- metaflow/plugins/pypi/pypi_decorator.py +11 -9
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +240 -50
- metaflow/runner/deployer.py +1 -1
- metaflow/runner/deployer_impl.py +12 -11
- metaflow/runner/metaflow_runner.py +68 -34
- metaflow/runner/nbdeploy.py +2 -0
- metaflow/runner/nbrun.py +1 -1
- metaflow/runner/subprocess_manager.py +61 -10
- metaflow/runner/utils.py +208 -44
- metaflow/runtime.py +216 -112
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/tracing/tracing_modules.py +4 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +548 -0
- metaflow/user_configs/config_parameters.py +436 -0
- metaflow/util.py +22 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
metaflow/cli.py
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import inspect
|
|
2
|
-
import json
|
|
3
3
|
import sys
|
|
4
4
|
import traceback
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from functools import wraps
|
|
7
6
|
|
|
8
7
|
import metaflow.tracing as tracing
|
|
9
8
|
from metaflow._vendor import click
|
|
10
|
-
from metaflow.client.core import get_metadata
|
|
11
9
|
|
|
12
|
-
from . import decorators, lint, metaflow_version,
|
|
10
|
+
from . import decorators, lint, metaflow_version, parameters, plugins
|
|
13
11
|
from .cli_args import cli_args
|
|
14
|
-
from .
|
|
12
|
+
from .cli_components.utils import LazyGroup, LazyPluginCommandCollection
|
|
13
|
+
from .datastore import FlowDataStore, TaskDataStoreSet
|
|
14
|
+
from .debug import debug
|
|
15
15
|
from .exception import CommandException, MetaflowException
|
|
16
|
+
from .flowspec import _FlowState
|
|
16
17
|
from .graph import FlowGraph
|
|
17
18
|
from .metaflow_config import (
|
|
18
19
|
DECOSPECS,
|
|
@@ -26,8 +27,6 @@ from .metaflow_config import (
|
|
|
26
27
|
from .metaflow_current import current
|
|
27
28
|
from metaflow.system import _system_monitor, _system_logger
|
|
28
29
|
from .metaflow_environment import MetaflowEnvironment
|
|
29
|
-
from .mflog import LOG_SOURCES, mflog
|
|
30
|
-
from .package import MetaflowPackage
|
|
31
30
|
from .plugins import (
|
|
32
31
|
DATASTORES,
|
|
33
32
|
ENVIRONMENTS,
|
|
@@ -37,16 +36,9 @@ from .plugins import (
|
|
|
37
36
|
)
|
|
38
37
|
from .pylint_wrapper import PyLint
|
|
39
38
|
from .R import metaflow_r_version, use_r
|
|
40
|
-
from .
|
|
41
|
-
from .
|
|
42
|
-
from .
|
|
43
|
-
from .unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
44
|
-
from .util import (
|
|
45
|
-
decompress_list,
|
|
46
|
-
get_latest_run_id,
|
|
47
|
-
resolve_identity,
|
|
48
|
-
write_latest_run_id,
|
|
49
|
-
)
|
|
39
|
+
from .util import get_latest_run_id, resolve_identity
|
|
40
|
+
from .user_configs.config_options import LocalFileInput, config_options
|
|
41
|
+
from .user_configs.config_parameters import ConfigValue
|
|
50
42
|
|
|
51
43
|
ERASE_TO_EOL = "\033[K"
|
|
52
44
|
HIGHLIGHT = "red"
|
|
@@ -56,13 +48,6 @@ LOGGER_TIMESTAMP = "magenta"
|
|
|
56
48
|
LOGGER_COLOR = "green"
|
|
57
49
|
LOGGER_BAD_COLOR = "red"
|
|
58
50
|
|
|
59
|
-
try:
|
|
60
|
-
# Python 2
|
|
61
|
-
import cPickle as pickle
|
|
62
|
-
except ImportError:
|
|
63
|
-
# Python 3
|
|
64
|
-
import pickle
|
|
65
|
-
|
|
66
51
|
|
|
67
52
|
def echo_dev_null(*args, **kwargs):
|
|
68
53
|
pass
|
|
@@ -141,7 +126,16 @@ def config_merge_cb(ctx, param, value):
|
|
|
141
126
|
return tuple(list(value) + DECOSPECS.split())
|
|
142
127
|
|
|
143
128
|
|
|
144
|
-
@click.group(
|
|
129
|
+
@click.group(
|
|
130
|
+
cls=LazyGroup,
|
|
131
|
+
lazy_subcommands={
|
|
132
|
+
"init": "metaflow.cli_components.init_cmd.init",
|
|
133
|
+
"dump": "metaflow.cli_components.dump_cmd.dump",
|
|
134
|
+
"step": "metaflow.cli_components.step_cmd.step",
|
|
135
|
+
"run": "metaflow.cli_components.run_cmds.run",
|
|
136
|
+
"resume": "metaflow.cli_components.run_cmds.resume",
|
|
137
|
+
},
|
|
138
|
+
)
|
|
145
139
|
def cli(ctx):
|
|
146
140
|
pass
|
|
147
141
|
|
|
@@ -155,7 +149,13 @@ def cli(ctx):
|
|
|
155
149
|
)
|
|
156
150
|
@click.pass_obj
|
|
157
151
|
def check(obj, warnings=False):
|
|
158
|
-
|
|
152
|
+
if obj.is_quiet:
|
|
153
|
+
echo = echo_dev_null
|
|
154
|
+
else:
|
|
155
|
+
echo = echo_always
|
|
156
|
+
_check(
|
|
157
|
+
echo, obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings
|
|
158
|
+
)
|
|
159
159
|
fname = inspect.getfile(obj.flow.__class__)
|
|
160
160
|
echo(
|
|
161
161
|
"\n*'{cmd} show'* shows a description of this flow.\n"
|
|
@@ -170,7 +170,8 @@ def check(obj, warnings=False):
|
|
|
170
170
|
@click.pass_obj
|
|
171
171
|
def show(obj):
|
|
172
172
|
echo_always("\n%s" % obj.graph.doc)
|
|
173
|
-
for
|
|
173
|
+
for node_name in obj.graph.sorted_nodes:
|
|
174
|
+
node = obj.graph[node_name]
|
|
174
175
|
echo_always("\nStep *%s*" % node.name, err=False)
|
|
175
176
|
echo_always(node.doc if node.doc else "?", indent=True, err=False)
|
|
176
177
|
if node.type != "end":
|
|
@@ -221,670 +222,32 @@ def output_dot(obj):
|
|
|
221
222
|
echo_always(obj.graph.output_dot(), err=False)
|
|
222
223
|
|
|
223
224
|
|
|
224
|
-
@cli.command(
|
|
225
|
-
help="Get data artifacts of a task or all tasks in a step. "
|
|
226
|
-
"The format for input-path is either <run_id>/<step_name> or "
|
|
227
|
-
"<run_id>/<step_name>/<task_id>."
|
|
228
|
-
)
|
|
229
|
-
@click.argument("input-path")
|
|
230
|
-
@click.option(
|
|
231
|
-
"--private/--no-private",
|
|
232
|
-
default=False,
|
|
233
|
-
show_default=True,
|
|
234
|
-
help="Show also private attributes.",
|
|
235
|
-
)
|
|
236
|
-
@click.option(
|
|
237
|
-
"--max-value-size",
|
|
238
|
-
default=1000,
|
|
239
|
-
show_default=True,
|
|
240
|
-
type=int,
|
|
241
|
-
help="Show only values that are smaller than this number. "
|
|
242
|
-
"Set to 0 to see only keys.",
|
|
243
|
-
)
|
|
244
|
-
@click.option(
|
|
245
|
-
"--include",
|
|
246
|
-
type=str,
|
|
247
|
-
default="",
|
|
248
|
-
help="Include only artifacts in the given comma-separated list.",
|
|
249
|
-
)
|
|
250
|
-
@click.option(
|
|
251
|
-
"--file", type=str, default=None, help="Serialize artifacts in the given file."
|
|
252
|
-
)
|
|
253
|
-
@click.pass_obj
|
|
254
|
-
def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):
|
|
255
|
-
output = {}
|
|
256
|
-
kwargs = {
|
|
257
|
-
"show_private": private,
|
|
258
|
-
"max_value_size": max_value_size,
|
|
259
|
-
"include": {t for t in include.split(",") if t},
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
|
|
263
|
-
parts = input_path.split("/")
|
|
264
|
-
if len(parts) == 2:
|
|
265
|
-
run_id, step_name = parts
|
|
266
|
-
task_id = None
|
|
267
|
-
elif len(parts) == 3:
|
|
268
|
-
run_id, step_name, task_id = parts
|
|
269
|
-
else:
|
|
270
|
-
raise CommandException(
|
|
271
|
-
"input_path should either be run_id/step_name or run_id/step_name/task_id"
|
|
272
|
-
)
|
|
273
|
-
|
|
274
|
-
datastore_set = TaskDataStoreSet(
|
|
275
|
-
obj.flow_datastore,
|
|
276
|
-
run_id,
|
|
277
|
-
steps=[step_name],
|
|
278
|
-
prefetch_data_artifacts=kwargs.get("include"),
|
|
279
|
-
)
|
|
280
|
-
if task_id:
|
|
281
|
-
ds_list = [datastore_set.get_with_pathspec(input_path)]
|
|
282
|
-
else:
|
|
283
|
-
ds_list = list(datastore_set) # get all tasks
|
|
284
|
-
|
|
285
|
-
for ds in ds_list:
|
|
286
|
-
echo(
|
|
287
|
-
"Dumping output of run_id=*{run_id}* "
|
|
288
|
-
"step=*{step}* task_id=*{task_id}*".format(
|
|
289
|
-
run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
|
|
290
|
-
),
|
|
291
|
-
fg="magenta",
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
if file is None:
|
|
295
|
-
echo_always(
|
|
296
|
-
ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
|
|
297
|
-
)
|
|
298
|
-
else:
|
|
299
|
-
output[ds.pathspec] = ds.to_dict(**kwargs)
|
|
300
|
-
|
|
301
|
-
if file is not None:
|
|
302
|
-
with open(file, "wb") as f:
|
|
303
|
-
pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
304
|
-
echo("Artifacts written to *%s*" % file)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
# TODO - move step and init under a separate 'internal' subcommand
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
@cli.command(help="Internal command to execute a single task.", hidden=True)
|
|
311
|
-
@click.argument("step-name")
|
|
312
|
-
@click.option(
|
|
313
|
-
"--run-id",
|
|
314
|
-
default=None,
|
|
315
|
-
required=True,
|
|
316
|
-
help="ID for one execution of all steps in the flow.",
|
|
317
|
-
)
|
|
318
|
-
@click.option(
|
|
319
|
-
"--task-id",
|
|
320
|
-
default=None,
|
|
321
|
-
required=True,
|
|
322
|
-
show_default=True,
|
|
323
|
-
help="ID for this instance of the step.",
|
|
324
|
-
)
|
|
325
|
-
@click.option(
|
|
326
|
-
"--input-paths",
|
|
327
|
-
help="A comma-separated list of pathspecs specifying inputs for this step.",
|
|
328
|
-
)
|
|
329
|
-
@click.option(
|
|
330
|
-
"--input-paths-filename",
|
|
331
|
-
type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True),
|
|
332
|
-
help="A filename containing the argument typically passed to `input-paths`",
|
|
333
|
-
hidden=True,
|
|
334
|
-
)
|
|
335
|
-
@click.option(
|
|
336
|
-
"--split-index",
|
|
337
|
-
type=int,
|
|
338
|
-
default=None,
|
|
339
|
-
show_default=True,
|
|
340
|
-
help="Index of this foreach split.",
|
|
341
|
-
)
|
|
342
|
-
@click.option(
|
|
343
|
-
"--tag",
|
|
344
|
-
"opt_tag",
|
|
345
|
-
multiple=True,
|
|
346
|
-
default=None,
|
|
347
|
-
help="Annotate this run with the given tag. You can specify "
|
|
348
|
-
"this option multiple times to attach multiple tags in "
|
|
349
|
-
"the task.",
|
|
350
|
-
)
|
|
351
|
-
@click.option(
|
|
352
|
-
"--namespace",
|
|
353
|
-
"opt_namespace",
|
|
354
|
-
default=None,
|
|
355
|
-
help="Change namespace from the default (your username) to the specified tag.",
|
|
356
|
-
)
|
|
357
|
-
@click.option(
|
|
358
|
-
"--retry-count",
|
|
359
|
-
default=0,
|
|
360
|
-
help="How many times we have attempted to run this task.",
|
|
361
|
-
)
|
|
362
|
-
@click.option(
|
|
363
|
-
"--max-user-code-retries",
|
|
364
|
-
default=0,
|
|
365
|
-
help="How many times we should attempt running the user code.",
|
|
366
|
-
)
|
|
367
|
-
@click.option(
|
|
368
|
-
"--clone-only",
|
|
369
|
-
default=None,
|
|
370
|
-
help="Pathspec of the origin task for this task to clone. Do "
|
|
371
|
-
"not execute anything.",
|
|
372
|
-
)
|
|
373
|
-
@click.option(
|
|
374
|
-
"--clone-run-id",
|
|
375
|
-
default=None,
|
|
376
|
-
help="Run id of the origin flow, if this task is part of a flow being resumed.",
|
|
377
|
-
)
|
|
378
|
-
@click.option(
|
|
379
|
-
"--with",
|
|
380
|
-
"decospecs",
|
|
381
|
-
multiple=True,
|
|
382
|
-
help="Add a decorator to this task. You can specify this "
|
|
383
|
-
"option multiple times to attach multiple decorators "
|
|
384
|
-
"to this task.",
|
|
385
|
-
)
|
|
386
|
-
@click.option(
|
|
387
|
-
"--ubf-context",
|
|
388
|
-
default="none",
|
|
389
|
-
type=click.Choice(["none", UBF_CONTROL, UBF_TASK]),
|
|
390
|
-
help="Provides additional context if this task is of type unbounded foreach.",
|
|
391
|
-
)
|
|
392
|
-
@click.option(
|
|
393
|
-
"--num-parallel",
|
|
394
|
-
default=0,
|
|
395
|
-
type=int,
|
|
396
|
-
help="Number of parallel instances of a step. Ignored in local mode (see parallel decorator code).",
|
|
397
|
-
)
|
|
398
|
-
@click.pass_context
|
|
399
|
-
def step(
|
|
400
|
-
ctx,
|
|
401
|
-
step_name,
|
|
402
|
-
opt_tag=None,
|
|
403
|
-
run_id=None,
|
|
404
|
-
task_id=None,
|
|
405
|
-
input_paths=None,
|
|
406
|
-
input_paths_filename=None,
|
|
407
|
-
split_index=None,
|
|
408
|
-
opt_namespace=None,
|
|
409
|
-
retry_count=None,
|
|
410
|
-
max_user_code_retries=None,
|
|
411
|
-
clone_only=None,
|
|
412
|
-
clone_run_id=None,
|
|
413
|
-
decospecs=None,
|
|
414
|
-
ubf_context="none",
|
|
415
|
-
num_parallel=None,
|
|
416
|
-
):
|
|
417
|
-
if ubf_context == "none":
|
|
418
|
-
ubf_context = None
|
|
419
|
-
if opt_namespace is not None:
|
|
420
|
-
namespace(opt_namespace or None)
|
|
421
|
-
|
|
422
|
-
func = None
|
|
423
|
-
try:
|
|
424
|
-
func = getattr(ctx.obj.flow, step_name)
|
|
425
|
-
except:
|
|
426
|
-
raise CommandException("Step *%s* doesn't exist." % step_name)
|
|
427
|
-
if not func.is_step:
|
|
428
|
-
raise CommandException("Function *%s* is not a step." % step_name)
|
|
429
|
-
echo("Executing a step, *%s*" % step_name, fg="magenta", bold=False)
|
|
430
|
-
|
|
431
|
-
if decospecs:
|
|
432
|
-
decorators._attach_decorators_to_step(func, decospecs)
|
|
433
|
-
|
|
434
|
-
step_kwargs = ctx.params
|
|
435
|
-
# Remove argument `step_name` from `step_kwargs`.
|
|
436
|
-
step_kwargs.pop("step_name", None)
|
|
437
|
-
# Remove `opt_*` prefix from (some) option keys.
|
|
438
|
-
step_kwargs = dict(
|
|
439
|
-
[(k[4:], v) if k.startswith("opt_") else (k, v) for k, v in step_kwargs.items()]
|
|
440
|
-
)
|
|
441
|
-
cli_args._set_step_kwargs(step_kwargs)
|
|
442
|
-
|
|
443
|
-
ctx.obj.metadata.add_sticky_tags(tags=opt_tag)
|
|
444
|
-
if not input_paths and input_paths_filename:
|
|
445
|
-
with open(input_paths_filename, mode="r", encoding="utf-8") as f:
|
|
446
|
-
input_paths = f.read().strip(" \n\"'")
|
|
447
|
-
|
|
448
|
-
paths = decompress_list(input_paths) if input_paths else []
|
|
449
|
-
|
|
450
|
-
task = MetaflowTask(
|
|
451
|
-
ctx.obj.flow,
|
|
452
|
-
ctx.obj.flow_datastore,
|
|
453
|
-
ctx.obj.metadata,
|
|
454
|
-
ctx.obj.environment,
|
|
455
|
-
ctx.obj.echo,
|
|
456
|
-
ctx.obj.event_logger,
|
|
457
|
-
ctx.obj.monitor,
|
|
458
|
-
ubf_context,
|
|
459
|
-
)
|
|
460
|
-
if clone_only:
|
|
461
|
-
task.clone_only(
|
|
462
|
-
step_name,
|
|
463
|
-
run_id,
|
|
464
|
-
task_id,
|
|
465
|
-
clone_only,
|
|
466
|
-
retry_count,
|
|
467
|
-
)
|
|
468
|
-
else:
|
|
469
|
-
task.run_step(
|
|
470
|
-
step_name,
|
|
471
|
-
run_id,
|
|
472
|
-
task_id,
|
|
473
|
-
clone_run_id,
|
|
474
|
-
paths,
|
|
475
|
-
split_index,
|
|
476
|
-
retry_count,
|
|
477
|
-
max_user_code_retries,
|
|
478
|
-
)
|
|
479
|
-
|
|
480
|
-
echo("Success", fg="green", bold=True, indent=True)
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
@parameters.add_custom_parameters(deploy_mode=False)
|
|
484
|
-
@cli.command(help="Internal command to initialize a run.", hidden=True)
|
|
485
|
-
@click.option(
|
|
486
|
-
"--run-id",
|
|
487
|
-
default=None,
|
|
488
|
-
required=True,
|
|
489
|
-
help="ID for one execution of all steps in the flow.",
|
|
490
|
-
)
|
|
491
|
-
@click.option(
|
|
492
|
-
"--task-id", default=None, required=True, help="ID for this instance of the step."
|
|
493
|
-
)
|
|
494
|
-
@click.option(
|
|
495
|
-
"--tag",
|
|
496
|
-
"tags",
|
|
497
|
-
multiple=True,
|
|
498
|
-
default=None,
|
|
499
|
-
help="Tags for this instance of the step.",
|
|
500
|
-
)
|
|
501
|
-
@click.pass_obj
|
|
502
|
-
def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
|
|
503
|
-
# init is a separate command instead of an option in 'step'
|
|
504
|
-
# since we need to capture user-specified parameters with
|
|
505
|
-
# @add_custom_parameters. Adding custom parameters to 'step'
|
|
506
|
-
# is not desirable due to the possibility of name clashes between
|
|
507
|
-
# user-specified parameters and our internal options. Note that
|
|
508
|
-
# user-specified parameters are often defined as environment
|
|
509
|
-
# variables.
|
|
510
|
-
|
|
511
|
-
obj.metadata.add_sticky_tags(tags=tags)
|
|
512
|
-
|
|
513
|
-
runtime = NativeRuntime(
|
|
514
|
-
obj.flow,
|
|
515
|
-
obj.graph,
|
|
516
|
-
obj.flow_datastore,
|
|
517
|
-
obj.metadata,
|
|
518
|
-
obj.environment,
|
|
519
|
-
obj.package,
|
|
520
|
-
obj.logger,
|
|
521
|
-
obj.entrypoint,
|
|
522
|
-
obj.event_logger,
|
|
523
|
-
obj.monitor,
|
|
524
|
-
run_id=run_id,
|
|
525
|
-
)
|
|
526
|
-
obj.flow._set_constants(obj.graph, kwargs)
|
|
527
|
-
runtime.persist_constants(task_id=task_id)
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
def common_run_options(func):
|
|
531
|
-
@click.option(
|
|
532
|
-
"--tag",
|
|
533
|
-
"tags",
|
|
534
|
-
multiple=True,
|
|
535
|
-
default=None,
|
|
536
|
-
help="Annotate this run with the given tag. You can specify "
|
|
537
|
-
"this option multiple times to attach multiple tags in "
|
|
538
|
-
"the run.",
|
|
539
|
-
)
|
|
540
|
-
@click.option(
|
|
541
|
-
"--max-workers",
|
|
542
|
-
default=16,
|
|
543
|
-
show_default=True,
|
|
544
|
-
help="Maximum number of parallel processes.",
|
|
545
|
-
)
|
|
546
|
-
@click.option(
|
|
547
|
-
"--max-num-splits",
|
|
548
|
-
default=100,
|
|
549
|
-
show_default=True,
|
|
550
|
-
help="Maximum number of splits allowed in a foreach. This "
|
|
551
|
-
"is a safety check preventing bugs from triggering "
|
|
552
|
-
"thousands of steps inadvertently.",
|
|
553
|
-
)
|
|
554
|
-
@click.option(
|
|
555
|
-
"--max-log-size",
|
|
556
|
-
default=10,
|
|
557
|
-
show_default=True,
|
|
558
|
-
help="Maximum size of stdout and stderr captured in "
|
|
559
|
-
"megabytes. If a step outputs more than this to "
|
|
560
|
-
"stdout/stderr, its output will be truncated.",
|
|
561
|
-
)
|
|
562
|
-
@click.option(
|
|
563
|
-
"--with",
|
|
564
|
-
"decospecs",
|
|
565
|
-
multiple=True,
|
|
566
|
-
help="Add a decorator to all steps. You can specify this "
|
|
567
|
-
"option multiple times to attach multiple decorators "
|
|
568
|
-
"in steps.",
|
|
569
|
-
)
|
|
570
|
-
@click.option(
|
|
571
|
-
"--run-id-file",
|
|
572
|
-
default=None,
|
|
573
|
-
show_default=True,
|
|
574
|
-
type=str,
|
|
575
|
-
help="Write the ID of this run to the file specified.",
|
|
576
|
-
)
|
|
577
|
-
@click.option(
|
|
578
|
-
"--runner-attribute-file",
|
|
579
|
-
default=None,
|
|
580
|
-
show_default=True,
|
|
581
|
-
type=str,
|
|
582
|
-
help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
|
|
583
|
-
)
|
|
584
|
-
@wraps(func)
|
|
585
|
-
def wrapper(*args, **kwargs):
|
|
586
|
-
return func(*args, **kwargs)
|
|
587
|
-
|
|
588
|
-
return wrapper
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
@click.option(
|
|
592
|
-
"--origin-run-id",
|
|
593
|
-
default=None,
|
|
594
|
-
help="ID of the run that should be resumed. By default, the "
|
|
595
|
-
"last run executed locally.",
|
|
596
|
-
)
|
|
597
|
-
@click.option(
|
|
598
|
-
"--run-id",
|
|
599
|
-
default=None,
|
|
600
|
-
help="Run ID for the new run. By default, a new run-id will be generated",
|
|
601
|
-
hidden=True,
|
|
602
|
-
)
|
|
603
|
-
@click.option(
|
|
604
|
-
"--clone-only/--no-clone-only",
|
|
605
|
-
default=False,
|
|
606
|
-
show_default=True,
|
|
607
|
-
help="Only clone tasks without continuing execution",
|
|
608
|
-
hidden=True,
|
|
609
|
-
)
|
|
610
|
-
@click.option(
|
|
611
|
-
"--reentrant/--no-reentrant",
|
|
612
|
-
default=False,
|
|
613
|
-
show_default=True,
|
|
614
|
-
hidden=True,
|
|
615
|
-
help="If specified, allows this call to be called in parallel",
|
|
616
|
-
)
|
|
617
|
-
@click.option(
|
|
618
|
-
"--resume-identifier",
|
|
619
|
-
default=None,
|
|
620
|
-
show_default=True,
|
|
621
|
-
hidden=True,
|
|
622
|
-
help="If specified, it identifies the task that started this resume call. It is in the form of {step_name}-{task_id}",
|
|
623
|
-
)
|
|
624
|
-
@click.argument("step-to-rerun", required=False)
|
|
625
|
-
@cli.command(help="Resume execution of a previous run of this flow.")
|
|
626
|
-
@common_run_options
|
|
627
|
-
@click.pass_obj
|
|
628
|
-
def resume(
|
|
629
|
-
obj,
|
|
630
|
-
tags=None,
|
|
631
|
-
step_to_rerun=None,
|
|
632
|
-
origin_run_id=None,
|
|
633
|
-
run_id=None,
|
|
634
|
-
clone_only=False,
|
|
635
|
-
reentrant=False,
|
|
636
|
-
max_workers=None,
|
|
637
|
-
max_num_splits=None,
|
|
638
|
-
max_log_size=None,
|
|
639
|
-
decospecs=None,
|
|
640
|
-
run_id_file=None,
|
|
641
|
-
resume_identifier=None,
|
|
642
|
-
runner_attribute_file=None,
|
|
643
|
-
):
|
|
644
|
-
before_run(obj, tags, decospecs)
|
|
645
|
-
|
|
646
|
-
if origin_run_id is None:
|
|
647
|
-
origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
|
|
648
|
-
if origin_run_id is None:
|
|
649
|
-
raise CommandException(
|
|
650
|
-
"A previous run id was not found. Specify --origin-run-id."
|
|
651
|
-
)
|
|
652
|
-
|
|
653
|
-
if step_to_rerun is None:
|
|
654
|
-
steps_to_rerun = set()
|
|
655
|
-
else:
|
|
656
|
-
# validate step name
|
|
657
|
-
if step_to_rerun not in obj.graph.nodes:
|
|
658
|
-
raise CommandException(
|
|
659
|
-
"invalid step name {0} specified, must be step present in "
|
|
660
|
-
"current form of execution graph. Valid step names include: {1}".format(
|
|
661
|
-
step_to_rerun, ",".join(list(obj.graph.nodes.keys()))
|
|
662
|
-
)
|
|
663
|
-
)
|
|
664
|
-
steps_to_rerun = {step_to_rerun}
|
|
665
|
-
|
|
666
|
-
if run_id:
|
|
667
|
-
# Run-ids that are provided by the metadata service are always integers.
|
|
668
|
-
# External providers or run-ids (like external schedulers) always need to
|
|
669
|
-
# be non-integers to avoid any clashes. This condition ensures this.
|
|
670
|
-
try:
|
|
671
|
-
int(run_id)
|
|
672
|
-
except:
|
|
673
|
-
pass
|
|
674
|
-
else:
|
|
675
|
-
raise CommandException("run-id %s cannot be an integer" % run_id)
|
|
676
|
-
|
|
677
|
-
runtime = NativeRuntime(
|
|
678
|
-
obj.flow,
|
|
679
|
-
obj.graph,
|
|
680
|
-
obj.flow_datastore,
|
|
681
|
-
obj.metadata,
|
|
682
|
-
obj.environment,
|
|
683
|
-
obj.package,
|
|
684
|
-
obj.logger,
|
|
685
|
-
obj.entrypoint,
|
|
686
|
-
obj.event_logger,
|
|
687
|
-
obj.monitor,
|
|
688
|
-
run_id=run_id,
|
|
689
|
-
clone_run_id=origin_run_id,
|
|
690
|
-
clone_only=clone_only,
|
|
691
|
-
reentrant=reentrant,
|
|
692
|
-
steps_to_rerun=steps_to_rerun,
|
|
693
|
-
max_workers=max_workers,
|
|
694
|
-
max_num_splits=max_num_splits,
|
|
695
|
-
max_log_size=max_log_size * 1024 * 1024,
|
|
696
|
-
resume_identifier=resume_identifier,
|
|
697
|
-
)
|
|
698
|
-
write_file(run_id_file, runtime.run_id)
|
|
699
|
-
runtime.print_workflow_info()
|
|
700
|
-
|
|
701
|
-
runtime.persist_constants()
|
|
702
|
-
|
|
703
|
-
if runner_attribute_file:
|
|
704
|
-
with open(runner_attribute_file, "w", encoding="utf-8") as f:
|
|
705
|
-
json.dump(
|
|
706
|
-
{
|
|
707
|
-
"run_id": runtime.run_id,
|
|
708
|
-
"flow_name": obj.flow.name,
|
|
709
|
-
"metadata": obj.metadata.metadata_str(),
|
|
710
|
-
},
|
|
711
|
-
f,
|
|
712
|
-
)
|
|
713
|
-
|
|
714
|
-
# We may skip clone-only resume if this is not a resume leader,
|
|
715
|
-
# and clone is already complete.
|
|
716
|
-
if runtime.should_skip_clone_only_execution():
|
|
717
|
-
return
|
|
718
|
-
|
|
719
|
-
current._update_env(
|
|
720
|
-
{
|
|
721
|
-
"run_id": runtime.run_id,
|
|
722
|
-
}
|
|
723
|
-
)
|
|
724
|
-
_system_logger.log_event(
|
|
725
|
-
level="info",
|
|
726
|
-
module="metaflow.resume",
|
|
727
|
-
name="start",
|
|
728
|
-
payload={
|
|
729
|
-
"msg": "Resuming run",
|
|
730
|
-
},
|
|
731
|
-
)
|
|
732
|
-
|
|
733
|
-
with runtime.run_heartbeat():
|
|
734
|
-
if clone_only:
|
|
735
|
-
runtime.clone_original_run()
|
|
736
|
-
else:
|
|
737
|
-
runtime.clone_original_run(generate_task_obj=True, verbose=False)
|
|
738
|
-
runtime.execute()
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
@tracing.cli_entrypoint("cli/run")
|
|
742
|
-
@parameters.add_custom_parameters(deploy_mode=True)
|
|
743
|
-
@cli.command(help="Run the workflow locally.")
|
|
744
|
-
@common_run_options
|
|
745
|
-
@click.option(
|
|
746
|
-
"--namespace",
|
|
747
|
-
"user_namespace",
|
|
748
|
-
default=None,
|
|
749
|
-
help="Change namespace from the default (your username) to "
|
|
750
|
-
"the specified tag. Note that this option does not alter "
|
|
751
|
-
"tags assigned to the objects produced by this run, just "
|
|
752
|
-
"what existing objects are visible in the client API. You "
|
|
753
|
-
"can enable the global namespace with an empty string."
|
|
754
|
-
"--namespace=",
|
|
755
|
-
)
|
|
756
|
-
@click.pass_obj
|
|
757
|
-
def run(
|
|
758
|
-
obj,
|
|
759
|
-
tags=None,
|
|
760
|
-
max_workers=None,
|
|
761
|
-
max_num_splits=None,
|
|
762
|
-
max_log_size=None,
|
|
763
|
-
decospecs=None,
|
|
764
|
-
run_id_file=None,
|
|
765
|
-
runner_attribute_file=None,
|
|
766
|
-
user_namespace=None,
|
|
767
|
-
**kwargs
|
|
768
|
-
):
|
|
769
|
-
if user_namespace is not None:
|
|
770
|
-
namespace(user_namespace or None)
|
|
771
|
-
before_run(obj, tags, decospecs)
|
|
772
|
-
|
|
773
|
-
runtime = NativeRuntime(
|
|
774
|
-
obj.flow,
|
|
775
|
-
obj.graph,
|
|
776
|
-
obj.flow_datastore,
|
|
777
|
-
obj.metadata,
|
|
778
|
-
obj.environment,
|
|
779
|
-
obj.package,
|
|
780
|
-
obj.logger,
|
|
781
|
-
obj.entrypoint,
|
|
782
|
-
obj.event_logger,
|
|
783
|
-
obj.monitor,
|
|
784
|
-
max_workers=max_workers,
|
|
785
|
-
max_num_splits=max_num_splits,
|
|
786
|
-
max_log_size=max_log_size * 1024 * 1024,
|
|
787
|
-
)
|
|
788
|
-
write_latest_run_id(obj, runtime.run_id)
|
|
789
|
-
write_file(run_id_file, runtime.run_id)
|
|
790
|
-
|
|
791
|
-
obj.flow._set_constants(obj.graph, kwargs)
|
|
792
|
-
current._update_env(
|
|
793
|
-
{
|
|
794
|
-
"run_id": runtime.run_id,
|
|
795
|
-
}
|
|
796
|
-
)
|
|
797
|
-
_system_logger.log_event(
|
|
798
|
-
level="info",
|
|
799
|
-
module="metaflow.run",
|
|
800
|
-
name="start",
|
|
801
|
-
payload={
|
|
802
|
-
"msg": "Starting run",
|
|
803
|
-
},
|
|
804
|
-
)
|
|
805
|
-
runtime.print_workflow_info()
|
|
806
|
-
runtime.persist_constants()
|
|
807
|
-
|
|
808
|
-
if runner_attribute_file:
|
|
809
|
-
with open(runner_attribute_file, "w", encoding="utf-8") as f:
|
|
810
|
-
json.dump(
|
|
811
|
-
{
|
|
812
|
-
"run_id": runtime.run_id,
|
|
813
|
-
"flow_name": obj.flow.name,
|
|
814
|
-
"metadata": obj.metadata.metadata_str(),
|
|
815
|
-
},
|
|
816
|
-
f,
|
|
817
|
-
)
|
|
818
|
-
runtime.execute()
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
def write_file(file_path, content):
|
|
822
|
-
if file_path is not None:
|
|
823
|
-
with open(file_path, "w") as f:
|
|
824
|
-
f.write(str(content))
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
def before_run(obj, tags, decospecs):
|
|
828
|
-
validate_tags(tags)
|
|
829
|
-
|
|
830
|
-
# There's a --with option both at the top-level and for the run
|
|
831
|
-
# subcommand. Why?
|
|
832
|
-
#
|
|
833
|
-
# "run --with shoes" looks so much better than "--with shoes run".
|
|
834
|
-
# This is a very common use case of --with.
|
|
835
|
-
#
|
|
836
|
-
# A downside is that we need to have the following decorators handling
|
|
837
|
-
# in two places in this module and make sure _init_step_decorators
|
|
838
|
-
# doesn't get called twice.
|
|
839
|
-
|
|
840
|
-
# We want the order to be the following:
|
|
841
|
-
# - run level decospecs
|
|
842
|
-
# - top level decospecs
|
|
843
|
-
# - environment decospecs
|
|
844
|
-
all_decospecs = (
|
|
845
|
-
list(decospecs or [])
|
|
846
|
-
+ obj.tl_decospecs
|
|
847
|
-
+ list(obj.environment.decospecs() or [])
|
|
848
|
-
)
|
|
849
|
-
if all_decospecs:
|
|
850
|
-
decorators._attach_decorators(obj.flow, all_decospecs)
|
|
851
|
-
obj.graph = FlowGraph(obj.flow.__class__)
|
|
852
|
-
|
|
853
|
-
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
|
|
854
|
-
# obj.environment.init_environment(obj.logger)
|
|
855
|
-
|
|
856
|
-
decorators._init_step_decorators(
|
|
857
|
-
obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
|
|
858
|
-
)
|
|
859
|
-
|
|
860
|
-
obj.metadata.add_sticky_tags(tags=tags)
|
|
861
|
-
|
|
862
|
-
# Package working directory only once per run.
|
|
863
|
-
# We explicitly avoid doing this in `start` since it is invoked for every
|
|
864
|
-
# step in the run.
|
|
865
|
-
obj.package = MetaflowPackage(
|
|
866
|
-
obj.flow, obj.environment, obj.echo, obj.package_suffixes
|
|
867
|
-
)
|
|
868
|
-
|
|
869
|
-
|
|
870
225
|
@cli.command(help="Print the Metaflow version")
|
|
871
226
|
@click.pass_obj
|
|
872
227
|
def version(obj):
|
|
873
228
|
echo_always(obj.version)
|
|
874
229
|
|
|
875
230
|
|
|
876
|
-
|
|
231
|
+
# NOTE: add_decorator_options should be TL because it checks to make sure
|
|
232
|
+
# that no option conflict with the ones below
|
|
877
233
|
@decorators.add_decorator_options
|
|
234
|
+
@config_options
|
|
878
235
|
@click.command(
|
|
879
|
-
cls=
|
|
880
|
-
sources=[cli]
|
|
236
|
+
cls=LazyPluginCommandCollection,
|
|
237
|
+
sources=[cli],
|
|
238
|
+
lazy_sources=plugins.get_plugin_cli_path(),
|
|
881
239
|
invoke_without_command=True,
|
|
882
240
|
)
|
|
241
|
+
@tracing.cli_entrypoint("cli/start")
|
|
242
|
+
# Quiet is eager to make sure it is available when processing --config options since
|
|
243
|
+
# we need it to construct a context to pass to any DeployTimeField for the default
|
|
244
|
+
# value.
|
|
883
245
|
@click.option(
|
|
884
246
|
"--quiet/--not-quiet",
|
|
885
247
|
show_default=True,
|
|
886
248
|
default=False,
|
|
887
249
|
help="Suppress unnecessary messages",
|
|
250
|
+
is_eager=True,
|
|
888
251
|
)
|
|
889
252
|
@click.option(
|
|
890
253
|
"--metadata",
|
|
@@ -900,12 +263,14 @@ def version(obj):
|
|
|
900
263
|
type=click.Choice(["local"] + [m.TYPE for m in ENVIRONMENTS]),
|
|
901
264
|
help="Execution environment type",
|
|
902
265
|
)
|
|
266
|
+
# See comment for --quiet
|
|
903
267
|
@click.option(
|
|
904
268
|
"--datastore",
|
|
905
269
|
default=DEFAULT_DATASTORE,
|
|
906
270
|
show_default=True,
|
|
907
271
|
type=click.Choice([d.TYPE for d in DATASTORES]),
|
|
908
272
|
help="Data backend type",
|
|
273
|
+
is_eager=True,
|
|
909
274
|
)
|
|
910
275
|
@click.option("--datastore-root", help="Root path for datastore")
|
|
911
276
|
@click.option(
|
|
@@ -942,6 +307,15 @@ def version(obj):
|
|
|
942
307
|
type=click.Choice(MONITOR_SIDECARS),
|
|
943
308
|
help="Monitoring backend type",
|
|
944
309
|
)
|
|
310
|
+
@click.option(
|
|
311
|
+
"--local-config-file",
|
|
312
|
+
type=LocalFileInput(exists=True, readable=True, dir_okay=False, resolve_path=True),
|
|
313
|
+
required=False,
|
|
314
|
+
default=None,
|
|
315
|
+
help="A filename containing the dumped configuration values. Internal use only.",
|
|
316
|
+
hidden=True,
|
|
317
|
+
is_eager=True,
|
|
318
|
+
)
|
|
945
319
|
@click.pass_context
|
|
946
320
|
def start(
|
|
947
321
|
ctx,
|
|
@@ -955,9 +329,11 @@ def start(
|
|
|
955
329
|
pylint=None,
|
|
956
330
|
event_logger=None,
|
|
957
331
|
monitor=None,
|
|
332
|
+
local_config_file=None,
|
|
333
|
+
config_file=None,
|
|
334
|
+
config_value=None,
|
|
958
335
|
**deco_options
|
|
959
336
|
):
|
|
960
|
-
global echo
|
|
961
337
|
if quiet:
|
|
962
338
|
echo = echo_dev_null
|
|
963
339
|
else:
|
|
@@ -972,17 +348,111 @@ def start(
|
|
|
972
348
|
echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
|
|
973
349
|
echo(" for *%s*" % resolve_identity(), fg="magenta")
|
|
974
350
|
|
|
351
|
+
# Setup the context
|
|
975
352
|
cli_args._set_top_kwargs(ctx.params)
|
|
976
353
|
ctx.obj.echo = echo
|
|
977
354
|
ctx.obj.echo_always = echo_always
|
|
978
355
|
ctx.obj.is_quiet = quiet
|
|
979
|
-
ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
|
|
980
356
|
ctx.obj.logger = logger
|
|
981
|
-
ctx.obj.check = _check
|
|
982
357
|
ctx.obj.pylint = pylint
|
|
358
|
+
ctx.obj.check = functools.partial(_check, echo)
|
|
983
359
|
ctx.obj.top_cli = cli
|
|
984
360
|
ctx.obj.package_suffixes = package_suffixes.split(",")
|
|
985
|
-
|
|
361
|
+
|
|
362
|
+
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
|
|
363
|
+
|
|
364
|
+
if datastore_root is None:
|
|
365
|
+
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
|
|
366
|
+
ctx.obj.echo
|
|
367
|
+
)
|
|
368
|
+
if datastore_root is None:
|
|
369
|
+
raise CommandException(
|
|
370
|
+
"Could not find the location of the datastore -- did you correctly set the "
|
|
371
|
+
"METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
ctx.obj.datastore_impl.datastore_root = datastore_root
|
|
375
|
+
|
|
376
|
+
FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
|
|
377
|
+
|
|
378
|
+
# At this point, we are able to resolve the user-configuration options so we can
|
|
379
|
+
# process all those decorators that the user added that will modify the flow based
|
|
380
|
+
# on those configurations. It is important to do this as early as possible since it
|
|
381
|
+
# actually modifies the flow itself
|
|
382
|
+
|
|
383
|
+
# When we process the options, the first one processed will return None and the
|
|
384
|
+
# second one processed will return the actual options. The order of processing
|
|
385
|
+
# depends on what (and in what order) the user specifies on the command line.
|
|
386
|
+
config_options = config_file or config_value
|
|
387
|
+
|
|
388
|
+
if (
|
|
389
|
+
hasattr(ctx, "saved_args")
|
|
390
|
+
and ctx.saved_args
|
|
391
|
+
and ctx.saved_args[0] == "resume"
|
|
392
|
+
and getattr(ctx.obj, "has_config_options", False)
|
|
393
|
+
):
|
|
394
|
+
# In the case of resume, we actually need to load the configurations
|
|
395
|
+
# from the resumed run to process them. This can be slightly onerous so check
|
|
396
|
+
# if we need to in the first place
|
|
397
|
+
if getattr(ctx.obj, "has_cl_config_options", False):
|
|
398
|
+
raise click.UsageError(
|
|
399
|
+
"Cannot specify --config-file or --config-value with 'resume'"
|
|
400
|
+
)
|
|
401
|
+
# We now load the config artifacts from the original run id
|
|
402
|
+
run_id = None
|
|
403
|
+
try:
|
|
404
|
+
idx = ctx.saved_args.index("--origin-run-id")
|
|
405
|
+
except ValueError:
|
|
406
|
+
idx = -1
|
|
407
|
+
if idx >= 0:
|
|
408
|
+
run_id = ctx.saved_args[idx + 1]
|
|
409
|
+
else:
|
|
410
|
+
run_id = get_latest_run_id(ctx.obj.echo, ctx.obj.flow.name)
|
|
411
|
+
if run_id is None:
|
|
412
|
+
raise CommandException(
|
|
413
|
+
"A previous run id was not found. Specify --origin-run-id."
|
|
414
|
+
)
|
|
415
|
+
# We get the name of the parameters we need to load from the datastore -- these
|
|
416
|
+
# are accessed using the *variable* name and not necessarily the *parameter* name
|
|
417
|
+
config_var_names = []
|
|
418
|
+
config_param_names = []
|
|
419
|
+
for name, param in ctx.obj.flow._get_parameters():
|
|
420
|
+
if not param.IS_CONFIG_PARAMETER:
|
|
421
|
+
continue
|
|
422
|
+
config_var_names.append(name)
|
|
423
|
+
config_param_names.append(param.name)
|
|
424
|
+
|
|
425
|
+
# We just need a task datastore that will be thrown away -- we do this so
|
|
426
|
+
# we don't have to create the logger, monitor, etc.
|
|
427
|
+
debug.userconf_exec("Loading config parameters from run %s" % run_id)
|
|
428
|
+
for d in TaskDataStoreSet(
|
|
429
|
+
FlowDataStore(ctx.obj.flow.name),
|
|
430
|
+
run_id,
|
|
431
|
+
steps=["_parameters"],
|
|
432
|
+
prefetch_data_artifacts=config_var_names,
|
|
433
|
+
):
|
|
434
|
+
param_ds = d
|
|
435
|
+
|
|
436
|
+
# We can now set the the CONFIGS value in the flow properly. This will overwrite
|
|
437
|
+
# anything that may have been passed in by default and we will use exactly what
|
|
438
|
+
# the original flow had. Note that these are accessed through the parameter name
|
|
439
|
+
ctx.obj.flow._flow_state[_FlowState.CONFIGS].clear()
|
|
440
|
+
d = ctx.obj.flow._flow_state[_FlowState.CONFIGS]
|
|
441
|
+
for param_name, var_name in zip(config_param_names, config_var_names):
|
|
442
|
+
val = param_ds[var_name]
|
|
443
|
+
debug.userconf_exec("Loaded config %s as: %s" % (param_name, val))
|
|
444
|
+
d[param_name] = val
|
|
445
|
+
|
|
446
|
+
elif getattr(ctx.obj, "delayed_config_exception", None):
|
|
447
|
+
# If we are not doing a resume, any exception we had parsing configs needs to
|
|
448
|
+
# be raised. For resume, since we ignore those options, we ignore the error.
|
|
449
|
+
raise ctx.obj.delayed_config_exception
|
|
450
|
+
|
|
451
|
+
new_cls = ctx.obj.flow._process_config_decorators(config_options)
|
|
452
|
+
if new_cls:
|
|
453
|
+
ctx.obj.flow = new_cls(use_cli=False)
|
|
454
|
+
|
|
455
|
+
ctx.obj.graph = ctx.obj.flow._graph
|
|
986
456
|
|
|
987
457
|
ctx.obj.environment = [
|
|
988
458
|
e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
|
|
@@ -1005,21 +475,6 @@ def start(
|
|
|
1005
475
|
ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
|
|
1006
476
|
)
|
|
1007
477
|
|
|
1008
|
-
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
|
|
1009
|
-
|
|
1010
|
-
if datastore_root is None:
|
|
1011
|
-
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
|
|
1012
|
-
ctx.obj.echo
|
|
1013
|
-
)
|
|
1014
|
-
if datastore_root is None:
|
|
1015
|
-
raise CommandException(
|
|
1016
|
-
"Could not find the location of the datastore -- did you correctly set the "
|
|
1017
|
-
"METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
|
|
1018
|
-
)
|
|
1019
|
-
|
|
1020
|
-
ctx.obj.datastore_impl.datastore_root = datastore_root
|
|
1021
|
-
|
|
1022
|
-
FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
|
|
1023
478
|
ctx.obj.flow_datastore = FlowDataStore(
|
|
1024
479
|
ctx.obj.flow.name,
|
|
1025
480
|
ctx.obj.environment,
|
|
@@ -1028,6 +483,10 @@ def start(
|
|
|
1028
483
|
ctx.obj.monitor,
|
|
1029
484
|
)
|
|
1030
485
|
|
|
486
|
+
ctx.obj.config_options = config_options
|
|
487
|
+
|
|
488
|
+
decorators._init(ctx.obj.flow)
|
|
489
|
+
|
|
1031
490
|
# It is important to initialize flow decorators early as some of the
|
|
1032
491
|
# things they provide may be used by some of the objects initialized after.
|
|
1033
492
|
decorators._init_flow_decorators(
|
|
@@ -1045,15 +504,32 @@ def start(
|
|
|
1045
504
|
# *after* the run decospecs so that they don't take precedence. In other
|
|
1046
505
|
# words, for the same decorator, we want `myflow.py run --with foo` to
|
|
1047
506
|
# take precedence over any other `foo` decospec
|
|
507
|
+
|
|
508
|
+
# Note that top-level decospecs are used primarily with non run/resume
|
|
509
|
+
# options as well as with the airflow/argo/sfn integrations which pass
|
|
510
|
+
# all the decospecs (the ones from top-level but also the ones from the
|
|
511
|
+
# run/resume level) through the tl decospecs.
|
|
1048
512
|
ctx.obj.tl_decospecs = list(decospecs or [])
|
|
1049
513
|
|
|
1050
514
|
# initialize current and parameter context for deploy-time parameters
|
|
1051
515
|
current._set_env(flow=ctx.obj.flow, is_running=False)
|
|
1052
516
|
parameters.set_parameter_context(
|
|
1053
|
-
ctx.obj.flow.name,
|
|
517
|
+
ctx.obj.flow.name,
|
|
518
|
+
ctx.obj.echo,
|
|
519
|
+
ctx.obj.flow_datastore,
|
|
520
|
+
{
|
|
521
|
+
k: ConfigValue(v)
|
|
522
|
+
for k, v in ctx.obj.flow.__class__._flow_state.get(
|
|
523
|
+
_FlowState.CONFIGS, {}
|
|
524
|
+
).items()
|
|
525
|
+
},
|
|
1054
526
|
)
|
|
1055
527
|
|
|
1056
|
-
if
|
|
528
|
+
if (
|
|
529
|
+
hasattr(ctx, "saved_args")
|
|
530
|
+
and ctx.saved_args
|
|
531
|
+
and ctx.saved_args[0] not in ("run", "resume")
|
|
532
|
+
):
|
|
1057
533
|
# run/resume are special cases because they can add more decorators with --with,
|
|
1058
534
|
# so they have to take care of themselves.
|
|
1059
535
|
all_decospecs = ctx.obj.tl_decospecs + list(
|
|
@@ -1061,8 +537,10 @@ def start(
|
|
|
1061
537
|
)
|
|
1062
538
|
if all_decospecs:
|
|
1063
539
|
decorators._attach_decorators(ctx.obj.flow, all_decospecs)
|
|
540
|
+
decorators._init(ctx.obj.flow)
|
|
1064
541
|
# Regenerate graph if we attached more decorators
|
|
1065
|
-
ctx.obj.
|
|
542
|
+
ctx.obj.flow.__class__._init_attrs()
|
|
543
|
+
ctx.obj.graph = ctx.obj.flow._graph
|
|
1066
544
|
|
|
1067
545
|
decorators._init_step_decorators(
|
|
1068
546
|
ctx.obj.flow,
|
|
@@ -1074,25 +552,12 @@ def start(
|
|
|
1074
552
|
|
|
1075
553
|
# TODO (savin): Enable lazy instantiation of package
|
|
1076
554
|
ctx.obj.package = None
|
|
555
|
+
|
|
1077
556
|
if ctx.invoked_subcommand is None:
|
|
1078
557
|
ctx.invoke(check)
|
|
1079
558
|
|
|
1080
559
|
|
|
1081
|
-
def
|
|
1082
|
-
for k, v in params.items():
|
|
1083
|
-
if v:
|
|
1084
|
-
if k == "decospecs":
|
|
1085
|
-
k = "with"
|
|
1086
|
-
k = k.replace("_", "-")
|
|
1087
|
-
if not isinstance(v, tuple):
|
|
1088
|
-
v = [v]
|
|
1089
|
-
for value in v:
|
|
1090
|
-
yield "--%s" % k
|
|
1091
|
-
if not isinstance(value, bool):
|
|
1092
|
-
yield str(value)
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
|
|
560
|
+
def _check(echo, graph, flow, environment, pylint=True, warnings=False, **kwargs):
|
|
1096
561
|
echo("Validating your flow...", fg="magenta", bold=False)
|
|
1097
562
|
linter = lint.linter
|
|
1098
563
|
# TODO set linter settings
|
|
@@ -1131,10 +596,13 @@ def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
|
|
|
1131
596
|
|
|
1132
597
|
def print_metaflow_exception(ex):
|
|
1133
598
|
echo_always(ex.headline, indent=True, nl=False, bold=True)
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
599
|
+
location = ""
|
|
600
|
+
if ex.source_file is not None:
|
|
601
|
+
location += " in file %s" % ex.source_file
|
|
602
|
+
if ex.line_no is not None:
|
|
603
|
+
location += " on line %d" % ex.line_no
|
|
604
|
+
location += ":"
|
|
605
|
+
echo_always(location, bold=True)
|
|
1138
606
|
echo_always(ex.message, indent=True, bold=False, padding_bottom=True)
|
|
1139
607
|
|
|
1140
608
|
|