ob-metaflow 2.12.36.2__py2.py3-none-any.whl → 2.12.36.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (54) hide show
  1. metaflow/__init__.py +0 -3
  2. metaflow/cli.py +697 -84
  3. metaflow/cli_args.py +0 -17
  4. metaflow/cmd/develop/stub_generator.py +2 -9
  5. metaflow/decorators.py +2 -63
  6. metaflow/extension_support/plugins.py +27 -41
  7. metaflow/flowspec.py +16 -156
  8. metaflow/includefile.py +22 -50
  9. metaflow/metaflow_config.py +1 -1
  10. metaflow/package.py +3 -17
  11. metaflow/parameters.py +23 -80
  12. metaflow/plugins/__init__.py +0 -4
  13. metaflow/plugins/airflow/airflow_cli.py +0 -1
  14. metaflow/plugins/argo/argo_workflows.py +1 -41
  15. metaflow/plugins/argo/argo_workflows_cli.py +0 -1
  16. metaflow/plugins/argo/argo_workflows_deployer_objects.py +1 -5
  17. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  18. metaflow/plugins/aws/step_functions/step_functions.py +0 -32
  19. metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
  20. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -3
  21. metaflow/plugins/datatools/s3/s3op.py +3 -3
  22. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  23. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  24. metaflow/plugins/pypi/conda_decorator.py +10 -20
  25. metaflow/plugins/pypi/pypi_decorator.py +9 -11
  26. metaflow/plugins/timeout_decorator.py +2 -2
  27. metaflow/runner/click_api.py +19 -73
  28. metaflow/runner/deployer.py +1 -1
  29. metaflow/runner/deployer_impl.py +2 -2
  30. metaflow/runner/metaflow_runner.py +1 -4
  31. metaflow/runner/nbdeploy.py +0 -2
  32. metaflow/runner/nbrun.py +1 -1
  33. metaflow/runner/subprocess_manager.py +1 -3
  34. metaflow/runner/utils.py +20 -37
  35. metaflow/runtime.py +73 -111
  36. metaflow/sidecar/sidecar_worker.py +1 -1
  37. metaflow/util.py +0 -17
  38. metaflow/version.py +1 -1
  39. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/METADATA +2 -3
  40. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/RECORD +44 -54
  41. metaflow/cli_components/__init__.py +0 -0
  42. metaflow/cli_components/dump_cmd.py +0 -96
  43. metaflow/cli_components/init_cmd.py +0 -51
  44. metaflow/cli_components/run_cmds.py +0 -358
  45. metaflow/cli_components/step_cmd.py +0 -189
  46. metaflow/cli_components/utils.py +0 -140
  47. metaflow/user_configs/__init__.py +0 -0
  48. metaflow/user_configs/config_decorators.py +0 -563
  49. metaflow/user_configs/config_options.py +0 -495
  50. metaflow/user_configs/config_parameters.py +0 -386
  51. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/LICENSE +0 -0
  52. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/WHEEL +0 -0
  53. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/entry_points.txt +0 -0
  54. {ob_metaflow-2.12.36.2.dist-info → ob_metaflow-2.12.36.3.dist-info}/top_level.txt +0 -0
metaflow/cli.py CHANGED
@@ -1,18 +1,18 @@
1
- import functools
2
1
  import inspect
2
+ import json
3
3
  import sys
4
4
  import traceback
5
5
  from datetime import datetime
6
+ from functools import wraps
6
7
 
7
8
  import metaflow.tracing as tracing
8
9
  from metaflow._vendor import click
10
+ from metaflow.client.core import get_metadata
9
11
 
10
- from . import decorators, lint, metaflow_version, parameters, plugins
12
+ from . import decorators, lint, metaflow_version, namespace, parameters, plugins
11
13
  from .cli_args import cli_args
12
- from .cli_components.utils import LazyGroup, LazyPluginCommandCollection
13
- from .datastore import FlowDataStore
14
+ from .datastore import FlowDataStore, TaskDataStore, TaskDataStoreSet
14
15
  from .exception import CommandException, MetaflowException
15
- from .flowspec import _FlowState
16
16
  from .graph import FlowGraph
17
17
  from .metaflow_config import (
18
18
  DECOSPECS,
@@ -26,6 +26,8 @@ from .metaflow_config import (
26
26
  from .metaflow_current import current
27
27
  from metaflow.system import _system_monitor, _system_logger
28
28
  from .metaflow_environment import MetaflowEnvironment
29
+ from .mflog import LOG_SOURCES, mflog
30
+ from .package import MetaflowPackage
29
31
  from .plugins import (
30
32
  DATASTORES,
31
33
  ENVIRONMENTS,
@@ -35,9 +37,16 @@ from .plugins import (
35
37
  )
36
38
  from .pylint_wrapper import PyLint
37
39
  from .R import metaflow_r_version, use_r
38
- from .util import resolve_identity
39
- from .user_configs.config_options import LocalFileInput, config_options
40
- from .user_configs.config_parameters import ConfigValue
40
+ from .runtime import NativeRuntime
41
+ from .tagging_util import validate_tags
42
+ from .task import MetaflowTask
43
+ from .unbounded_foreach import UBF_CONTROL, UBF_TASK
44
+ from .util import (
45
+ decompress_list,
46
+ get_latest_run_id,
47
+ resolve_identity,
48
+ write_latest_run_id,
49
+ )
41
50
 
42
51
  ERASE_TO_EOL = "\033[K"
43
52
  HIGHLIGHT = "red"
@@ -47,6 +56,13 @@ LOGGER_TIMESTAMP = "magenta"
47
56
  LOGGER_COLOR = "green"
48
57
  LOGGER_BAD_COLOR = "red"
49
58
 
59
+ try:
60
+ # Python 2
61
+ import cPickle as pickle
62
+ except ImportError:
63
+ # Python 3
64
+ import pickle
65
+
50
66
 
51
67
  def echo_dev_null(*args, **kwargs):
52
68
  pass
@@ -125,16 +141,7 @@ def config_merge_cb(ctx, param, value):
125
141
  return tuple(list(value) + DECOSPECS.split())
126
142
 
127
143
 
128
- @click.group(
129
- cls=LazyGroup,
130
- lazy_subcommands={
131
- "init": "metaflow.cli_components.init_cmd.init",
132
- "dump": "metaflow.cli_components.dump_cmd.dump",
133
- "step": "metaflow.cli_components.step_cmd.step",
134
- "run": "metaflow.cli_components.run_cmds.run",
135
- "resume": "metaflow.cli_components.run_cmds.resume",
136
- },
137
- )
144
+ @click.group()
138
145
  def cli(ctx):
139
146
  pass
140
147
 
@@ -148,13 +155,7 @@ def cli(ctx):
148
155
  )
149
156
  @click.pass_obj
150
157
  def check(obj, warnings=False):
151
- if obj.is_quiet:
152
- echo = echo_dev_null
153
- else:
154
- echo = echo_always
155
- _check(
156
- echo, obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings
157
- )
158
+ _check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings)
158
159
  fname = inspect.getfile(obj.flow.__class__)
159
160
  echo(
160
161
  "\n*'{cmd} show'* shows a description of this flow.\n"
@@ -220,32 +221,671 @@ def output_dot(obj):
220
221
  echo_always(obj.graph.output_dot(), err=False)
221
222
 
222
223
 
224
+ @cli.command(
225
+ help="Get data artifacts of a task or all tasks in a step. "
226
+ "The format for input-path is either <run_id>/<step_name> or "
227
+ "<run_id>/<step_name>/<task_id>."
228
+ )
229
+ @click.argument("input-path")
230
+ @click.option(
231
+ "--private/--no-private",
232
+ default=False,
233
+ show_default=True,
234
+ help="Show also private attributes.",
235
+ )
236
+ @click.option(
237
+ "--max-value-size",
238
+ default=1000,
239
+ show_default=True,
240
+ type=int,
241
+ help="Show only values that are smaller than this number. "
242
+ "Set to 0 to see only keys.",
243
+ )
244
+ @click.option(
245
+ "--include",
246
+ type=str,
247
+ default="",
248
+ help="Include only artifacts in the given comma-separated list.",
249
+ )
250
+ @click.option(
251
+ "--file", type=str, default=None, help="Serialize artifacts in the given file."
252
+ )
253
+ @click.pass_obj
254
+ def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):
255
+ output = {}
256
+ kwargs = {
257
+ "show_private": private,
258
+ "max_value_size": max_value_size,
259
+ "include": {t for t in include.split(",") if t},
260
+ }
261
+
262
+ # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
263
+ parts = input_path.split("/")
264
+ if len(parts) == 2:
265
+ run_id, step_name = parts
266
+ task_id = None
267
+ elif len(parts) == 3:
268
+ run_id, step_name, task_id = parts
269
+ else:
270
+ raise CommandException(
271
+ "input_path should either be run_id/step_name or run_id/step_name/task_id"
272
+ )
273
+
274
+ datastore_set = TaskDataStoreSet(
275
+ obj.flow_datastore,
276
+ run_id,
277
+ steps=[step_name],
278
+ prefetch_data_artifacts=kwargs.get("include"),
279
+ )
280
+ if task_id:
281
+ ds_list = [datastore_set.get_with_pathspec(input_path)]
282
+ else:
283
+ ds_list = list(datastore_set) # get all tasks
284
+
285
+ for ds in ds_list:
286
+ echo(
287
+ "Dumping output of run_id=*{run_id}* "
288
+ "step=*{step}* task_id=*{task_id}*".format(
289
+ run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
290
+ ),
291
+ fg="magenta",
292
+ )
293
+
294
+ if file is None:
295
+ echo_always(
296
+ ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
297
+ )
298
+ else:
299
+ output[ds.pathspec] = ds.to_dict(**kwargs)
300
+
301
+ if file is not None:
302
+ with open(file, "wb") as f:
303
+ pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
304
+ echo("Artifacts written to *%s*" % file)
305
+
306
+
307
+ # TODO - move step and init under a separate 'internal' subcommand
308
+
309
+
310
+ @cli.command(help="Internal command to execute a single task.", hidden=True)
311
+ @click.argument("step-name")
312
+ @click.option(
313
+ "--run-id",
314
+ default=None,
315
+ required=True,
316
+ help="ID for one execution of all steps in the flow.",
317
+ )
318
+ @click.option(
319
+ "--task-id",
320
+ default=None,
321
+ required=True,
322
+ show_default=True,
323
+ help="ID for this instance of the step.",
324
+ )
325
+ @click.option(
326
+ "--input-paths",
327
+ help="A comma-separated list of pathspecs specifying inputs for this step.",
328
+ )
329
+ @click.option(
330
+ "--input-paths-filename",
331
+ type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True),
332
+ help="A filename containing the argument typically passed to `input-paths`",
333
+ hidden=True,
334
+ )
335
+ @click.option(
336
+ "--split-index",
337
+ type=int,
338
+ default=None,
339
+ show_default=True,
340
+ help="Index of this foreach split.",
341
+ )
342
+ @click.option(
343
+ "--tag",
344
+ "opt_tag",
345
+ multiple=True,
346
+ default=None,
347
+ help="Annotate this run with the given tag. You can specify "
348
+ "this option multiple times to attach multiple tags in "
349
+ "the task.",
350
+ )
351
+ @click.option(
352
+ "--namespace",
353
+ "opt_namespace",
354
+ default=None,
355
+ help="Change namespace from the default (your username) to the specified tag.",
356
+ )
357
+ @click.option(
358
+ "--retry-count",
359
+ default=0,
360
+ help="How many times we have attempted to run this task.",
361
+ )
362
+ @click.option(
363
+ "--max-user-code-retries",
364
+ default=0,
365
+ help="How many times we should attempt running the user code.",
366
+ )
367
+ @click.option(
368
+ "--clone-only",
369
+ default=None,
370
+ help="Pathspec of the origin task for this task to clone. Do "
371
+ "not execute anything.",
372
+ )
373
+ @click.option(
374
+ "--clone-run-id",
375
+ default=None,
376
+ help="Run id of the origin flow, if this task is part of a flow being resumed.",
377
+ )
378
+ @click.option(
379
+ "--with",
380
+ "decospecs",
381
+ multiple=True,
382
+ help="Add a decorator to this task. You can specify this "
383
+ "option multiple times to attach multiple decorators "
384
+ "to this task.",
385
+ )
386
+ @click.option(
387
+ "--ubf-context",
388
+ default="none",
389
+ type=click.Choice(["none", UBF_CONTROL, UBF_TASK]),
390
+ help="Provides additional context if this task is of type unbounded foreach.",
391
+ )
392
+ @click.option(
393
+ "--num-parallel",
394
+ default=0,
395
+ type=int,
396
+ help="Number of parallel instances of a step. Ignored in local mode (see parallel decorator code).",
397
+ )
398
+ @click.pass_context
399
+ def step(
400
+ ctx,
401
+ step_name,
402
+ opt_tag=None,
403
+ run_id=None,
404
+ task_id=None,
405
+ input_paths=None,
406
+ input_paths_filename=None,
407
+ split_index=None,
408
+ opt_namespace=None,
409
+ retry_count=None,
410
+ max_user_code_retries=None,
411
+ clone_only=None,
412
+ clone_run_id=None,
413
+ decospecs=None,
414
+ ubf_context="none",
415
+ num_parallel=None,
416
+ ):
417
+ if ubf_context == "none":
418
+ ubf_context = None
419
+ if opt_namespace is not None:
420
+ namespace(opt_namespace or None)
421
+
422
+ func = None
423
+ try:
424
+ func = getattr(ctx.obj.flow, step_name)
425
+ except:
426
+ raise CommandException("Step *%s* doesn't exist." % step_name)
427
+ if not func.is_step:
428
+ raise CommandException("Function *%s* is not a step." % step_name)
429
+ echo("Executing a step, *%s*" % step_name, fg="magenta", bold=False)
430
+
431
+ if decospecs:
432
+ decorators._attach_decorators_to_step(func, decospecs)
433
+
434
+ step_kwargs = ctx.params
435
+ # Remove argument `step_name` from `step_kwargs`.
436
+ step_kwargs.pop("step_name", None)
437
+ # Remove `opt_*` prefix from (some) option keys.
438
+ step_kwargs = dict(
439
+ [(k[4:], v) if k.startswith("opt_") else (k, v) for k, v in step_kwargs.items()]
440
+ )
441
+ cli_args._set_step_kwargs(step_kwargs)
442
+
443
+ ctx.obj.metadata.add_sticky_tags(tags=opt_tag)
444
+ if not input_paths and input_paths_filename:
445
+ with open(input_paths_filename, mode="r", encoding="utf-8") as f:
446
+ input_paths = f.read().strip(" \n\"'")
447
+
448
+ paths = decompress_list(input_paths) if input_paths else []
449
+
450
+ task = MetaflowTask(
451
+ ctx.obj.flow,
452
+ ctx.obj.flow_datastore,
453
+ ctx.obj.metadata,
454
+ ctx.obj.environment,
455
+ ctx.obj.echo,
456
+ ctx.obj.event_logger,
457
+ ctx.obj.monitor,
458
+ ubf_context,
459
+ )
460
+ if clone_only:
461
+ task.clone_only(
462
+ step_name,
463
+ run_id,
464
+ task_id,
465
+ clone_only,
466
+ retry_count,
467
+ )
468
+ else:
469
+ task.run_step(
470
+ step_name,
471
+ run_id,
472
+ task_id,
473
+ clone_run_id,
474
+ paths,
475
+ split_index,
476
+ retry_count,
477
+ max_user_code_retries,
478
+ )
479
+
480
+ echo("Success", fg="green", bold=True, indent=True)
481
+
482
+
483
+ @parameters.add_custom_parameters(deploy_mode=False)
484
+ @cli.command(help="Internal command to initialize a run.", hidden=True)
485
+ @click.option(
486
+ "--run-id",
487
+ default=None,
488
+ required=True,
489
+ help="ID for one execution of all steps in the flow.",
490
+ )
491
+ @click.option(
492
+ "--task-id", default=None, required=True, help="ID for this instance of the step."
493
+ )
494
+ @click.option(
495
+ "--tag",
496
+ "tags",
497
+ multiple=True,
498
+ default=None,
499
+ help="Tags for this instance of the step.",
500
+ )
501
+ @click.pass_obj
502
+ def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
503
+ # init is a separate command instead of an option in 'step'
504
+ # since we need to capture user-specified parameters with
505
+ # @add_custom_parameters. Adding custom parameters to 'step'
506
+ # is not desirable due to the possibility of name clashes between
507
+ # user-specified parameters and our internal options. Note that
508
+ # user-specified parameters are often defined as environment
509
+ # variables.
510
+
511
+ obj.metadata.add_sticky_tags(tags=tags)
512
+
513
+ runtime = NativeRuntime(
514
+ obj.flow,
515
+ obj.graph,
516
+ obj.flow_datastore,
517
+ obj.metadata,
518
+ obj.environment,
519
+ obj.package,
520
+ obj.logger,
521
+ obj.entrypoint,
522
+ obj.event_logger,
523
+ obj.monitor,
524
+ run_id=run_id,
525
+ )
526
+ obj.flow._set_constants(obj.graph, kwargs)
527
+ runtime.persist_constants(task_id=task_id)
528
+
529
+
530
+ def common_run_options(func):
531
+ @click.option(
532
+ "--tag",
533
+ "tags",
534
+ multiple=True,
535
+ default=None,
536
+ help="Annotate this run with the given tag. You can specify "
537
+ "this option multiple times to attach multiple tags in "
538
+ "the run.",
539
+ )
540
+ @click.option(
541
+ "--max-workers",
542
+ default=16,
543
+ show_default=True,
544
+ help="Maximum number of parallel processes.",
545
+ )
546
+ @click.option(
547
+ "--max-num-splits",
548
+ default=100,
549
+ show_default=True,
550
+ help="Maximum number of splits allowed in a foreach. This "
551
+ "is a safety check preventing bugs from triggering "
552
+ "thousands of steps inadvertently.",
553
+ )
554
+ @click.option(
555
+ "--max-log-size",
556
+ default=10,
557
+ show_default=True,
558
+ help="Maximum size of stdout and stderr captured in "
559
+ "megabytes. If a step outputs more than this to "
560
+ "stdout/stderr, its output will be truncated.",
561
+ )
562
+ @click.option(
563
+ "--with",
564
+ "decospecs",
565
+ multiple=True,
566
+ help="Add a decorator to all steps. You can specify this "
567
+ "option multiple times to attach multiple decorators "
568
+ "in steps.",
569
+ )
570
+ @click.option(
571
+ "--run-id-file",
572
+ default=None,
573
+ show_default=True,
574
+ type=str,
575
+ help="Write the ID of this run to the file specified.",
576
+ )
577
+ @click.option(
578
+ "--runner-attribute-file",
579
+ default=None,
580
+ show_default=True,
581
+ type=str,
582
+ help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
583
+ )
584
+ @wraps(func)
585
+ def wrapper(*args, **kwargs):
586
+ return func(*args, **kwargs)
587
+
588
+ return wrapper
589
+
590
+
591
+ @click.option(
592
+ "--origin-run-id",
593
+ default=None,
594
+ help="ID of the run that should be resumed. By default, the "
595
+ "last run executed locally.",
596
+ )
597
+ @click.option(
598
+ "--run-id",
599
+ default=None,
600
+ help="Run ID for the new run. By default, a new run-id will be generated",
601
+ hidden=True,
602
+ )
603
+ @click.option(
604
+ "--clone-only/--no-clone-only",
605
+ default=False,
606
+ show_default=True,
607
+ help="Only clone tasks without continuing execution",
608
+ hidden=True,
609
+ )
610
+ @click.option(
611
+ "--reentrant/--no-reentrant",
612
+ default=False,
613
+ show_default=True,
614
+ hidden=True,
615
+ help="If specified, allows this call to be called in parallel",
616
+ )
617
+ @click.option(
618
+ "--resume-identifier",
619
+ default=None,
620
+ show_default=True,
621
+ hidden=True,
622
+ help="If specified, it identifies the task that started this resume call. It is in the form of {step_name}-{task_id}",
623
+ )
624
+ @click.argument("step-to-rerun", required=False)
625
+ @cli.command(help="Resume execution of a previous run of this flow.")
626
+ @common_run_options
627
+ @click.pass_obj
628
+ def resume(
629
+ obj,
630
+ tags=None,
631
+ step_to_rerun=None,
632
+ origin_run_id=None,
633
+ run_id=None,
634
+ clone_only=False,
635
+ reentrant=False,
636
+ max_workers=None,
637
+ max_num_splits=None,
638
+ max_log_size=None,
639
+ decospecs=None,
640
+ run_id_file=None,
641
+ resume_identifier=None,
642
+ runner_attribute_file=None,
643
+ ):
644
+ before_run(obj, tags, decospecs)
645
+
646
+ if origin_run_id is None:
647
+ origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
648
+ if origin_run_id is None:
649
+ raise CommandException(
650
+ "A previous run id was not found. Specify --origin-run-id."
651
+ )
652
+
653
+ if step_to_rerun is None:
654
+ steps_to_rerun = set()
655
+ else:
656
+ # validate step name
657
+ if step_to_rerun not in obj.graph.nodes:
658
+ raise CommandException(
659
+ "invalid step name {0} specified, must be step present in "
660
+ "current form of execution graph. Valid step names include: {1}".format(
661
+ step_to_rerun, ",".join(list(obj.graph.nodes.keys()))
662
+ )
663
+ )
664
+ steps_to_rerun = {step_to_rerun}
665
+
666
+ if run_id:
667
+ # Run-ids that are provided by the metadata service are always integers.
668
+ # External providers or run-ids (like external schedulers) always need to
669
+ # be non-integers to avoid any clashes. This condition ensures this.
670
+ try:
671
+ int(run_id)
672
+ except:
673
+ pass
674
+ else:
675
+ raise CommandException("run-id %s cannot be an integer" % run_id)
676
+
677
+ runtime = NativeRuntime(
678
+ obj.flow,
679
+ obj.graph,
680
+ obj.flow_datastore,
681
+ obj.metadata,
682
+ obj.environment,
683
+ obj.package,
684
+ obj.logger,
685
+ obj.entrypoint,
686
+ obj.event_logger,
687
+ obj.monitor,
688
+ run_id=run_id,
689
+ clone_run_id=origin_run_id,
690
+ clone_only=clone_only,
691
+ reentrant=reentrant,
692
+ steps_to_rerun=steps_to_rerun,
693
+ max_workers=max_workers,
694
+ max_num_splits=max_num_splits,
695
+ max_log_size=max_log_size * 1024 * 1024,
696
+ resume_identifier=resume_identifier,
697
+ )
698
+ write_file(run_id_file, runtime.run_id)
699
+ runtime.print_workflow_info()
700
+
701
+ runtime.persist_constants()
702
+
703
+ if runner_attribute_file:
704
+ with open(runner_attribute_file, "w", encoding="utf-8") as f:
705
+ json.dump(
706
+ {
707
+ "run_id": runtime.run_id,
708
+ "flow_name": obj.flow.name,
709
+ "metadata": obj.metadata.metadata_str(),
710
+ },
711
+ f,
712
+ )
713
+
714
+ # We may skip clone-only resume if this is not a resume leader,
715
+ # and clone is already complete.
716
+ if runtime.should_skip_clone_only_execution():
717
+ return
718
+
719
+ current._update_env(
720
+ {
721
+ "run_id": runtime.run_id,
722
+ }
723
+ )
724
+ _system_logger.log_event(
725
+ level="info",
726
+ module="metaflow.resume",
727
+ name="start",
728
+ payload={
729
+ "msg": "Resuming run",
730
+ },
731
+ )
732
+
733
+ with runtime.run_heartbeat():
734
+ if clone_only:
735
+ runtime.clone_original_run()
736
+ else:
737
+ runtime.clone_original_run(generate_task_obj=True, verbose=False)
738
+ runtime.execute()
739
+
740
+
741
+ @tracing.cli_entrypoint("cli/run")
742
+ @parameters.add_custom_parameters(deploy_mode=True)
743
+ @cli.command(help="Run the workflow locally.")
744
+ @common_run_options
745
+ @click.option(
746
+ "--namespace",
747
+ "user_namespace",
748
+ default=None,
749
+ help="Change namespace from the default (your username) to "
750
+ "the specified tag. Note that this option does not alter "
751
+ "tags assigned to the objects produced by this run, just "
752
+ "what existing objects are visible in the client API. You "
753
+ "can enable the global namespace with an empty string."
754
+ "--namespace=",
755
+ )
756
+ @click.pass_obj
757
+ def run(
758
+ obj,
759
+ tags=None,
760
+ max_workers=None,
761
+ max_num_splits=None,
762
+ max_log_size=None,
763
+ decospecs=None,
764
+ run_id_file=None,
765
+ runner_attribute_file=None,
766
+ user_namespace=None,
767
+ **kwargs
768
+ ):
769
+ if user_namespace is not None:
770
+ namespace(user_namespace or None)
771
+ before_run(obj, tags, decospecs)
772
+
773
+ runtime = NativeRuntime(
774
+ obj.flow,
775
+ obj.graph,
776
+ obj.flow_datastore,
777
+ obj.metadata,
778
+ obj.environment,
779
+ obj.package,
780
+ obj.logger,
781
+ obj.entrypoint,
782
+ obj.event_logger,
783
+ obj.monitor,
784
+ max_workers=max_workers,
785
+ max_num_splits=max_num_splits,
786
+ max_log_size=max_log_size * 1024 * 1024,
787
+ )
788
+ write_latest_run_id(obj, runtime.run_id)
789
+ write_file(run_id_file, runtime.run_id)
790
+
791
+ obj.flow._set_constants(obj.graph, kwargs)
792
+ current._update_env(
793
+ {
794
+ "run_id": runtime.run_id,
795
+ }
796
+ )
797
+ _system_logger.log_event(
798
+ level="info",
799
+ module="metaflow.run",
800
+ name="start",
801
+ payload={
802
+ "msg": "Starting run",
803
+ },
804
+ )
805
+ with runtime.run_heartbeat():
806
+ runtime.print_workflow_info()
807
+ runtime.persist_constants()
808
+
809
+ if runner_attribute_file:
810
+ with open(runner_attribute_file, "w", encoding="utf-8") as f:
811
+ json.dump(
812
+ {
813
+ "run_id": runtime.run_id,
814
+ "flow_name": obj.flow.name,
815
+ "metadata": obj.metadata.metadata_str(),
816
+ },
817
+ f,
818
+ )
819
+ runtime.execute()
820
+
821
+
822
+ def write_file(file_path, content):
823
+ if file_path is not None:
824
+ with open(file_path, "w") as f:
825
+ f.write(str(content))
826
+
827
+
828
+ def before_run(obj, tags, decospecs):
829
+ validate_tags(tags)
830
+
831
+ # There's a --with option both at the top-level and for the run
832
+ # subcommand. Why?
833
+ #
834
+ # "run --with shoes" looks so much better than "--with shoes run".
835
+ # This is a very common use case of --with.
836
+ #
837
+ # A downside is that we need to have the following decorators handling
838
+ # in two places in this module and make sure _init_step_decorators
839
+ # doesn't get called twice.
840
+
841
+ # We want the order to be the following:
842
+ # - run level decospecs
843
+ # - top level decospecs
844
+ # - environment decospecs
845
+ all_decospecs = (
846
+ list(decospecs or [])
847
+ + obj.tl_decospecs
848
+ + list(obj.environment.decospecs() or [])
849
+ )
850
+ if all_decospecs:
851
+ decorators._attach_decorators(obj.flow, all_decospecs)
852
+ obj.graph = FlowGraph(obj.flow.__class__)
853
+
854
+ obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
855
+ # obj.environment.init_environment(obj.logger)
856
+
857
+ decorators._init_step_decorators(
858
+ obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
859
+ )
860
+
861
+ obj.metadata.add_sticky_tags(tags=tags)
862
+
863
+ # Package working directory only once per run.
864
+ # We explicitly avoid doing this in `start` since it is invoked for every
865
+ # step in the run.
866
+ obj.package = MetaflowPackage(
867
+ obj.flow, obj.environment, obj.echo, obj.package_suffixes
868
+ )
869
+
870
+
223
871
  @cli.command(help="Print the Metaflow version")
224
872
  @click.pass_obj
225
873
  def version(obj):
226
874
  echo_always(obj.version)
227
875
 
228
876
 
229
- # NOTE: add_decorator_options should be TL because it checks to make sure
230
- # that no option conflict with the ones below
877
+ @tracing.cli_entrypoint("cli/start")
231
878
  @decorators.add_decorator_options
232
- @config_options
233
879
  @click.command(
234
- cls=LazyPluginCommandCollection,
235
- sources=[cli],
236
- lazy_sources=plugins.get_plugin_cli_path(),
880
+ cls=click.CommandCollection,
881
+ sources=[cli] + plugins.get_plugin_cli(),
237
882
  invoke_without_command=True,
238
883
  )
239
- @tracing.cli_entrypoint("cli/start")
240
- # Quiet is eager to make sure it is available when processing --config options since
241
- # we need it to construct a context to pass to any DeployTimeField for the default
242
- # value.
243
884
  @click.option(
244
885
  "--quiet/--not-quiet",
245
886
  show_default=True,
246
887
  default=False,
247
888
  help="Suppress unnecessary messages",
248
- is_eager=True,
249
889
  )
250
890
  @click.option(
251
891
  "--metadata",
@@ -261,14 +901,12 @@ def version(obj):
261
901
  type=click.Choice(["local"] + [m.TYPE for m in ENVIRONMENTS]),
262
902
  help="Execution environment type",
263
903
  )
264
- # See comment for --quiet
265
904
  @click.option(
266
905
  "--datastore",
267
906
  default=DEFAULT_DATASTORE,
268
907
  show_default=True,
269
908
  type=click.Choice([d.TYPE for d in DATASTORES]),
270
909
  help="Data backend type",
271
- is_eager=True,
272
910
  )
273
911
  @click.option("--datastore-root", help="Root path for datastore")
274
912
  @click.option(
@@ -305,15 +943,6 @@ def version(obj):
305
943
  type=click.Choice(MONITOR_SIDECARS),
306
944
  help="Monitoring backend type",
307
945
  )
308
- @click.option(
309
- "--local-config-file",
310
- type=LocalFileInput(exists=True, readable=True, dir_okay=False, resolve_path=True),
311
- required=False,
312
- default=None,
313
- help="A filename containing the dumped configuration values. Internal use only.",
314
- hidden=True,
315
- is_eager=True,
316
- )
317
946
  @click.pass_context
318
947
  def start(
319
948
  ctx,
@@ -327,11 +956,9 @@ def start(
327
956
  pylint=None,
328
957
  event_logger=None,
329
958
  monitor=None,
330
- local_config_file=None,
331
- config_file_options=None,
332
- config_value_options=None,
333
959
  **deco_options
334
960
  ):
961
+ global echo
335
962
  if quiet:
336
963
  echo = echo_dev_null
337
964
  else:
@@ -346,27 +973,17 @@ def start(
346
973
  echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
347
974
  echo(" for *%s*" % resolve_identity(), fg="magenta")
348
975
 
349
- # At this point, we are able to resolve the user-configuration options so we can
350
- # process all those decorators that the user added that will modify the flow based
351
- # on those configurations. It is important to do this as early as possible since it
352
- # actually modifies the flow itself
353
-
354
- # When we process the options, the first one processed will return None and the
355
- # second one processed will return the actual options. The order of processing
356
- # depends on what (and in what order) the user specifies on the command line.
357
- config_options = config_file_options or config_value_options
358
- ctx.obj.flow = ctx.obj.flow._process_config_decorators(config_options)
359
-
360
976
  cli_args._set_top_kwargs(ctx.params)
361
977
  ctx.obj.echo = echo
362
978
  ctx.obj.echo_always = echo_always
363
979
  ctx.obj.is_quiet = quiet
364
- ctx.obj.graph = ctx.obj.flow._graph
980
+ ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
365
981
  ctx.obj.logger = logger
982
+ ctx.obj.check = _check
366
983
  ctx.obj.pylint = pylint
367
- ctx.obj.check = functools.partial(_check, echo)
368
984
  ctx.obj.top_cli = cli
369
985
  ctx.obj.package_suffixes = package_suffixes.split(",")
986
+ ctx.obj.reconstruct_cli = _reconstruct_cli
370
987
 
371
988
  ctx.obj.environment = [
372
989
  e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
@@ -412,10 +1029,6 @@ def start(
412
1029
  ctx.obj.monitor,
413
1030
  )
414
1031
 
415
- ctx.obj.config_options = config_options
416
-
417
- decorators._init(ctx.obj.flow)
418
-
419
1032
  # It is important to initialize flow decorators early as some of the
420
1033
  # things they provide may be used by some of the objects initialized after.
421
1034
  decorators._init_flow_decorators(
@@ -438,22 +1051,10 @@ def start(
438
1051
  # initialize current and parameter context for deploy-time parameters
439
1052
  current._set_env(flow=ctx.obj.flow, is_running=False)
440
1053
  parameters.set_parameter_context(
441
- ctx.obj.flow.name,
442
- ctx.obj.echo,
443
- ctx.obj.flow_datastore,
444
- {
445
- k: ConfigValue(v)
446
- for k, v in ctx.obj.flow.__class__._flow_state.get(
447
- _FlowState.CONFIGS, {}
448
- ).items()
449
- },
1054
+ ctx.obj.flow.name, ctx.obj.echo, ctx.obj.flow_datastore
450
1055
  )
451
1056
 
452
- if (
453
- hasattr(ctx, "saved_args")
454
- and ctx.saved_args
455
- and ctx.saved_args[0] not in ("run", "resume")
456
- ):
1057
+ if ctx.invoked_subcommand not in ("run", "resume"):
457
1058
  # run/resume are special cases because they can add more decorators with --with,
458
1059
  # so they have to take care of themselves.
459
1060
  all_decospecs = ctx.obj.tl_decospecs + list(
@@ -461,7 +1062,6 @@ def start(
461
1062
  )
462
1063
  if all_decospecs:
463
1064
  decorators._attach_decorators(ctx.obj.flow, all_decospecs)
464
- decorators._init(ctx.obj.flow)
465
1065
  # Regenerate graph if we attached more decorators
466
1066
  ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
467
1067
 
@@ -475,12 +1075,25 @@ def start(
475
1075
 
476
1076
  # TODO (savin): Enable lazy instantiation of package
477
1077
  ctx.obj.package = None
478
-
479
1078
  if ctx.invoked_subcommand is None:
480
1079
  ctx.invoke(check)
481
1080
 
482
1081
 
483
- def _check(echo, graph, flow, environment, pylint=True, warnings=False, **kwargs):
1082
+ def _reconstruct_cli(params):
1083
+ for k, v in params.items():
1084
+ if v:
1085
+ if k == "decospecs":
1086
+ k = "with"
1087
+ k = k.replace("_", "-")
1088
+ if not isinstance(v, tuple):
1089
+ v = [v]
1090
+ for value in v:
1091
+ yield "--%s" % k
1092
+ if not isinstance(value, bool):
1093
+ yield str(value)
1094
+
1095
+
1096
+ def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
484
1097
  echo("Validating your flow...", fg="magenta", bold=False)
485
1098
  linter = lint.linter
486
1099
  # TODO set linter settings