ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (96) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cards.py +1 -0
  3. metaflow/cli.py +185 -717
  4. metaflow/cli_args.py +17 -0
  5. metaflow/cli_components/__init__.py +0 -0
  6. metaflow/cli_components/dump_cmd.py +96 -0
  7. metaflow/cli_components/init_cmd.py +51 -0
  8. metaflow/cli_components/run_cmds.py +362 -0
  9. metaflow/cli_components/step_cmd.py +176 -0
  10. metaflow/cli_components/utils.py +140 -0
  11. metaflow/cmd/develop/stub_generator.py +9 -2
  12. metaflow/datastore/flow_datastore.py +2 -2
  13. metaflow/decorators.py +63 -2
  14. metaflow/exception.py +8 -2
  15. metaflow/extension_support/plugins.py +42 -27
  16. metaflow/flowspec.py +176 -23
  17. metaflow/graph.py +28 -27
  18. metaflow/includefile.py +50 -22
  19. metaflow/lint.py +35 -20
  20. metaflow/metadata_provider/heartbeat.py +23 -8
  21. metaflow/metaflow_config.py +10 -1
  22. metaflow/multicore_utils.py +31 -14
  23. metaflow/package.py +17 -3
  24. metaflow/parameters.py +97 -25
  25. metaflow/plugins/__init__.py +22 -0
  26. metaflow/plugins/airflow/airflow.py +18 -17
  27. metaflow/plugins/airflow/airflow_cli.py +1 -0
  28. metaflow/plugins/argo/argo_client.py +0 -2
  29. metaflow/plugins/argo/argo_workflows.py +195 -132
  30. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  31. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  32. metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
  33. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  34. metaflow/plugins/aws/aws_utils.py +6 -1
  35. metaflow/plugins/aws/batch/batch_client.py +1 -3
  36. metaflow/plugins/aws/batch/batch_decorator.py +13 -13
  37. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  38. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  39. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  40. metaflow/plugins/aws/step_functions/step_functions.py +33 -1
  41. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  42. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  43. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
  44. metaflow/plugins/cards/card_cli.py +7 -2
  45. metaflow/plugins/cards/card_creator.py +1 -0
  46. metaflow/plugins/cards/card_decorator.py +79 -8
  47. metaflow/plugins/cards/card_modules/basic.py +56 -5
  48. metaflow/plugins/cards/card_modules/card.py +16 -1
  49. metaflow/plugins/cards/card_modules/components.py +64 -16
  50. metaflow/plugins/cards/card_modules/main.js +27 -25
  51. metaflow/plugins/cards/card_modules/test_cards.py +4 -4
  52. metaflow/plugins/cards/component_serializer.py +1 -1
  53. metaflow/plugins/datatools/s3/s3.py +12 -4
  54. metaflow/plugins/datatools/s3/s3op.py +3 -3
  55. metaflow/plugins/events_decorator.py +338 -186
  56. metaflow/plugins/kubernetes/kube_utils.py +84 -1
  57. metaflow/plugins/kubernetes/kubernetes.py +40 -92
  58. metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
  59. metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
  60. metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
  61. metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
  62. metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
  63. metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
  64. metaflow/plugins/parallel_decorator.py +4 -1
  65. metaflow/plugins/project_decorator.py +33 -5
  66. metaflow/plugins/pypi/bootstrap.py +249 -81
  67. metaflow/plugins/pypi/conda_decorator.py +20 -10
  68. metaflow/plugins/pypi/conda_environment.py +83 -27
  69. metaflow/plugins/pypi/micromamba.py +82 -37
  70. metaflow/plugins/pypi/pip.py +9 -6
  71. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  72. metaflow/plugins/pypi/utils.py +4 -2
  73. metaflow/plugins/timeout_decorator.py +2 -2
  74. metaflow/runner/click_api.py +240 -50
  75. metaflow/runner/deployer.py +1 -1
  76. metaflow/runner/deployer_impl.py +12 -11
  77. metaflow/runner/metaflow_runner.py +68 -34
  78. metaflow/runner/nbdeploy.py +2 -0
  79. metaflow/runner/nbrun.py +1 -1
  80. metaflow/runner/subprocess_manager.py +61 -10
  81. metaflow/runner/utils.py +208 -44
  82. metaflow/runtime.py +216 -112
  83. metaflow/sidecar/sidecar_worker.py +1 -1
  84. metaflow/tracing/tracing_modules.py +4 -1
  85. metaflow/user_configs/__init__.py +0 -0
  86. metaflow/user_configs/config_decorators.py +563 -0
  87. metaflow/user_configs/config_options.py +548 -0
  88. metaflow/user_configs/config_parameters.py +436 -0
  89. metaflow/util.py +22 -0
  90. metaflow/version.py +1 -1
  91. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
  92. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
  93. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
  94. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
  95. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
  96. {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
metaflow/cli.py CHANGED
@@ -1,18 +1,19 @@
1
+ import functools
1
2
  import inspect
2
- import json
3
3
  import sys
4
4
  import traceback
5
5
  from datetime import datetime
6
- from functools import wraps
7
6
 
8
7
  import metaflow.tracing as tracing
9
8
  from metaflow._vendor import click
10
- from metaflow.client.core import get_metadata
11
9
 
12
- from . import decorators, lint, metaflow_version, namespace, parameters, plugins
10
+ from . import decorators, lint, metaflow_version, parameters, plugins
13
11
  from .cli_args import cli_args
14
- from .datastore import FlowDataStore, TaskDataStore, TaskDataStoreSet
12
+ from .cli_components.utils import LazyGroup, LazyPluginCommandCollection
13
+ from .datastore import FlowDataStore, TaskDataStoreSet
14
+ from .debug import debug
15
15
  from .exception import CommandException, MetaflowException
16
+ from .flowspec import _FlowState
16
17
  from .graph import FlowGraph
17
18
  from .metaflow_config import (
18
19
  DECOSPECS,
@@ -26,8 +27,6 @@ from .metaflow_config import (
26
27
  from .metaflow_current import current
27
28
  from metaflow.system import _system_monitor, _system_logger
28
29
  from .metaflow_environment import MetaflowEnvironment
29
- from .mflog import LOG_SOURCES, mflog
30
- from .package import MetaflowPackage
31
30
  from .plugins import (
32
31
  DATASTORES,
33
32
  ENVIRONMENTS,
@@ -37,16 +36,9 @@ from .plugins import (
37
36
  )
38
37
  from .pylint_wrapper import PyLint
39
38
  from .R import metaflow_r_version, use_r
40
- from .runtime import NativeRuntime
41
- from .tagging_util import validate_tags
42
- from .task import MetaflowTask
43
- from .unbounded_foreach import UBF_CONTROL, UBF_TASK
44
- from .util import (
45
- decompress_list,
46
- get_latest_run_id,
47
- resolve_identity,
48
- write_latest_run_id,
49
- )
39
+ from .util import get_latest_run_id, resolve_identity
40
+ from .user_configs.config_options import LocalFileInput, config_options
41
+ from .user_configs.config_parameters import ConfigValue
50
42
 
51
43
  ERASE_TO_EOL = "\033[K"
52
44
  HIGHLIGHT = "red"
@@ -56,13 +48,6 @@ LOGGER_TIMESTAMP = "magenta"
56
48
  LOGGER_COLOR = "green"
57
49
  LOGGER_BAD_COLOR = "red"
58
50
 
59
- try:
60
- # Python 2
61
- import cPickle as pickle
62
- except ImportError:
63
- # Python 3
64
- import pickle
65
-
66
51
 
67
52
  def echo_dev_null(*args, **kwargs):
68
53
  pass
@@ -141,7 +126,16 @@ def config_merge_cb(ctx, param, value):
141
126
  return tuple(list(value) + DECOSPECS.split())
142
127
 
143
128
 
144
- @click.group()
129
+ @click.group(
130
+ cls=LazyGroup,
131
+ lazy_subcommands={
132
+ "init": "metaflow.cli_components.init_cmd.init",
133
+ "dump": "metaflow.cli_components.dump_cmd.dump",
134
+ "step": "metaflow.cli_components.step_cmd.step",
135
+ "run": "metaflow.cli_components.run_cmds.run",
136
+ "resume": "metaflow.cli_components.run_cmds.resume",
137
+ },
138
+ )
145
139
  def cli(ctx):
146
140
  pass
147
141
 
@@ -155,7 +149,13 @@ def cli(ctx):
155
149
  )
156
150
  @click.pass_obj
157
151
  def check(obj, warnings=False):
158
- _check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings)
152
+ if obj.is_quiet:
153
+ echo = echo_dev_null
154
+ else:
155
+ echo = echo_always
156
+ _check(
157
+ echo, obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings
158
+ )
159
159
  fname = inspect.getfile(obj.flow.__class__)
160
160
  echo(
161
161
  "\n*'{cmd} show'* shows a description of this flow.\n"
@@ -170,7 +170,8 @@ def check(obj, warnings=False):
170
170
  @click.pass_obj
171
171
  def show(obj):
172
172
  echo_always("\n%s" % obj.graph.doc)
173
- for _, node in sorted((n.func_lineno, n) for n in obj.graph):
173
+ for node_name in obj.graph.sorted_nodes:
174
+ node = obj.graph[node_name]
174
175
  echo_always("\nStep *%s*" % node.name, err=False)
175
176
  echo_always(node.doc if node.doc else "?", indent=True, err=False)
176
177
  if node.type != "end":
@@ -221,670 +222,32 @@ def output_dot(obj):
221
222
  echo_always(obj.graph.output_dot(), err=False)
222
223
 
223
224
 
224
- @cli.command(
225
- help="Get data artifacts of a task or all tasks in a step. "
226
- "The format for input-path is either <run_id>/<step_name> or "
227
- "<run_id>/<step_name>/<task_id>."
228
- )
229
- @click.argument("input-path")
230
- @click.option(
231
- "--private/--no-private",
232
- default=False,
233
- show_default=True,
234
- help="Show also private attributes.",
235
- )
236
- @click.option(
237
- "--max-value-size",
238
- default=1000,
239
- show_default=True,
240
- type=int,
241
- help="Show only values that are smaller than this number. "
242
- "Set to 0 to see only keys.",
243
- )
244
- @click.option(
245
- "--include",
246
- type=str,
247
- default="",
248
- help="Include only artifacts in the given comma-separated list.",
249
- )
250
- @click.option(
251
- "--file", type=str, default=None, help="Serialize artifacts in the given file."
252
- )
253
- @click.pass_obj
254
- def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):
255
- output = {}
256
- kwargs = {
257
- "show_private": private,
258
- "max_value_size": max_value_size,
259
- "include": {t for t in include.split(",") if t},
260
- }
261
-
262
- # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
263
- parts = input_path.split("/")
264
- if len(parts) == 2:
265
- run_id, step_name = parts
266
- task_id = None
267
- elif len(parts) == 3:
268
- run_id, step_name, task_id = parts
269
- else:
270
- raise CommandException(
271
- "input_path should either be run_id/step_name or run_id/step_name/task_id"
272
- )
273
-
274
- datastore_set = TaskDataStoreSet(
275
- obj.flow_datastore,
276
- run_id,
277
- steps=[step_name],
278
- prefetch_data_artifacts=kwargs.get("include"),
279
- )
280
- if task_id:
281
- ds_list = [datastore_set.get_with_pathspec(input_path)]
282
- else:
283
- ds_list = list(datastore_set) # get all tasks
284
-
285
- for ds in ds_list:
286
- echo(
287
- "Dumping output of run_id=*{run_id}* "
288
- "step=*{step}* task_id=*{task_id}*".format(
289
- run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
290
- ),
291
- fg="magenta",
292
- )
293
-
294
- if file is None:
295
- echo_always(
296
- ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
297
- )
298
- else:
299
- output[ds.pathspec] = ds.to_dict(**kwargs)
300
-
301
- if file is not None:
302
- with open(file, "wb") as f:
303
- pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
304
- echo("Artifacts written to *%s*" % file)
305
-
306
-
307
- # TODO - move step and init under a separate 'internal' subcommand
308
-
309
-
310
- @cli.command(help="Internal command to execute a single task.", hidden=True)
311
- @click.argument("step-name")
312
- @click.option(
313
- "--run-id",
314
- default=None,
315
- required=True,
316
- help="ID for one execution of all steps in the flow.",
317
- )
318
- @click.option(
319
- "--task-id",
320
- default=None,
321
- required=True,
322
- show_default=True,
323
- help="ID for this instance of the step.",
324
- )
325
- @click.option(
326
- "--input-paths",
327
- help="A comma-separated list of pathspecs specifying inputs for this step.",
328
- )
329
- @click.option(
330
- "--input-paths-filename",
331
- type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True),
332
- help="A filename containing the argument typically passed to `input-paths`",
333
- hidden=True,
334
- )
335
- @click.option(
336
- "--split-index",
337
- type=int,
338
- default=None,
339
- show_default=True,
340
- help="Index of this foreach split.",
341
- )
342
- @click.option(
343
- "--tag",
344
- "opt_tag",
345
- multiple=True,
346
- default=None,
347
- help="Annotate this run with the given tag. You can specify "
348
- "this option multiple times to attach multiple tags in "
349
- "the task.",
350
- )
351
- @click.option(
352
- "--namespace",
353
- "opt_namespace",
354
- default=None,
355
- help="Change namespace from the default (your username) to the specified tag.",
356
- )
357
- @click.option(
358
- "--retry-count",
359
- default=0,
360
- help="How many times we have attempted to run this task.",
361
- )
362
- @click.option(
363
- "--max-user-code-retries",
364
- default=0,
365
- help="How many times we should attempt running the user code.",
366
- )
367
- @click.option(
368
- "--clone-only",
369
- default=None,
370
- help="Pathspec of the origin task for this task to clone. Do "
371
- "not execute anything.",
372
- )
373
- @click.option(
374
- "--clone-run-id",
375
- default=None,
376
- help="Run id of the origin flow, if this task is part of a flow being resumed.",
377
- )
378
- @click.option(
379
- "--with",
380
- "decospecs",
381
- multiple=True,
382
- help="Add a decorator to this task. You can specify this "
383
- "option multiple times to attach multiple decorators "
384
- "to this task.",
385
- )
386
- @click.option(
387
- "--ubf-context",
388
- default="none",
389
- type=click.Choice(["none", UBF_CONTROL, UBF_TASK]),
390
- help="Provides additional context if this task is of type unbounded foreach.",
391
- )
392
- @click.option(
393
- "--num-parallel",
394
- default=0,
395
- type=int,
396
- help="Number of parallel instances of a step. Ignored in local mode (see parallel decorator code).",
397
- )
398
- @click.pass_context
399
- def step(
400
- ctx,
401
- step_name,
402
- opt_tag=None,
403
- run_id=None,
404
- task_id=None,
405
- input_paths=None,
406
- input_paths_filename=None,
407
- split_index=None,
408
- opt_namespace=None,
409
- retry_count=None,
410
- max_user_code_retries=None,
411
- clone_only=None,
412
- clone_run_id=None,
413
- decospecs=None,
414
- ubf_context="none",
415
- num_parallel=None,
416
- ):
417
- if ubf_context == "none":
418
- ubf_context = None
419
- if opt_namespace is not None:
420
- namespace(opt_namespace or None)
421
-
422
- func = None
423
- try:
424
- func = getattr(ctx.obj.flow, step_name)
425
- except:
426
- raise CommandException("Step *%s* doesn't exist." % step_name)
427
- if not func.is_step:
428
- raise CommandException("Function *%s* is not a step." % step_name)
429
- echo("Executing a step, *%s*" % step_name, fg="magenta", bold=False)
430
-
431
- if decospecs:
432
- decorators._attach_decorators_to_step(func, decospecs)
433
-
434
- step_kwargs = ctx.params
435
- # Remove argument `step_name` from `step_kwargs`.
436
- step_kwargs.pop("step_name", None)
437
- # Remove `opt_*` prefix from (some) option keys.
438
- step_kwargs = dict(
439
- [(k[4:], v) if k.startswith("opt_") else (k, v) for k, v in step_kwargs.items()]
440
- )
441
- cli_args._set_step_kwargs(step_kwargs)
442
-
443
- ctx.obj.metadata.add_sticky_tags(tags=opt_tag)
444
- if not input_paths and input_paths_filename:
445
- with open(input_paths_filename, mode="r", encoding="utf-8") as f:
446
- input_paths = f.read().strip(" \n\"'")
447
-
448
- paths = decompress_list(input_paths) if input_paths else []
449
-
450
- task = MetaflowTask(
451
- ctx.obj.flow,
452
- ctx.obj.flow_datastore,
453
- ctx.obj.metadata,
454
- ctx.obj.environment,
455
- ctx.obj.echo,
456
- ctx.obj.event_logger,
457
- ctx.obj.monitor,
458
- ubf_context,
459
- )
460
- if clone_only:
461
- task.clone_only(
462
- step_name,
463
- run_id,
464
- task_id,
465
- clone_only,
466
- retry_count,
467
- )
468
- else:
469
- task.run_step(
470
- step_name,
471
- run_id,
472
- task_id,
473
- clone_run_id,
474
- paths,
475
- split_index,
476
- retry_count,
477
- max_user_code_retries,
478
- )
479
-
480
- echo("Success", fg="green", bold=True, indent=True)
481
-
482
-
483
- @parameters.add_custom_parameters(deploy_mode=False)
484
- @cli.command(help="Internal command to initialize a run.", hidden=True)
485
- @click.option(
486
- "--run-id",
487
- default=None,
488
- required=True,
489
- help="ID for one execution of all steps in the flow.",
490
- )
491
- @click.option(
492
- "--task-id", default=None, required=True, help="ID for this instance of the step."
493
- )
494
- @click.option(
495
- "--tag",
496
- "tags",
497
- multiple=True,
498
- default=None,
499
- help="Tags for this instance of the step.",
500
- )
501
- @click.pass_obj
502
- def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
503
- # init is a separate command instead of an option in 'step'
504
- # since we need to capture user-specified parameters with
505
- # @add_custom_parameters. Adding custom parameters to 'step'
506
- # is not desirable due to the possibility of name clashes between
507
- # user-specified parameters and our internal options. Note that
508
- # user-specified parameters are often defined as environment
509
- # variables.
510
-
511
- obj.metadata.add_sticky_tags(tags=tags)
512
-
513
- runtime = NativeRuntime(
514
- obj.flow,
515
- obj.graph,
516
- obj.flow_datastore,
517
- obj.metadata,
518
- obj.environment,
519
- obj.package,
520
- obj.logger,
521
- obj.entrypoint,
522
- obj.event_logger,
523
- obj.monitor,
524
- run_id=run_id,
525
- )
526
- obj.flow._set_constants(obj.graph, kwargs)
527
- runtime.persist_constants(task_id=task_id)
528
-
529
-
530
- def common_run_options(func):
531
- @click.option(
532
- "--tag",
533
- "tags",
534
- multiple=True,
535
- default=None,
536
- help="Annotate this run with the given tag. You can specify "
537
- "this option multiple times to attach multiple tags in "
538
- "the run.",
539
- )
540
- @click.option(
541
- "--max-workers",
542
- default=16,
543
- show_default=True,
544
- help="Maximum number of parallel processes.",
545
- )
546
- @click.option(
547
- "--max-num-splits",
548
- default=100,
549
- show_default=True,
550
- help="Maximum number of splits allowed in a foreach. This "
551
- "is a safety check preventing bugs from triggering "
552
- "thousands of steps inadvertently.",
553
- )
554
- @click.option(
555
- "--max-log-size",
556
- default=10,
557
- show_default=True,
558
- help="Maximum size of stdout and stderr captured in "
559
- "megabytes. If a step outputs more than this to "
560
- "stdout/stderr, its output will be truncated.",
561
- )
562
- @click.option(
563
- "--with",
564
- "decospecs",
565
- multiple=True,
566
- help="Add a decorator to all steps. You can specify this "
567
- "option multiple times to attach multiple decorators "
568
- "in steps.",
569
- )
570
- @click.option(
571
- "--run-id-file",
572
- default=None,
573
- show_default=True,
574
- type=str,
575
- help="Write the ID of this run to the file specified.",
576
- )
577
- @click.option(
578
- "--runner-attribute-file",
579
- default=None,
580
- show_default=True,
581
- type=str,
582
- help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
583
- )
584
- @wraps(func)
585
- def wrapper(*args, **kwargs):
586
- return func(*args, **kwargs)
587
-
588
- return wrapper
589
-
590
-
591
- @click.option(
592
- "--origin-run-id",
593
- default=None,
594
- help="ID of the run that should be resumed. By default, the "
595
- "last run executed locally.",
596
- )
597
- @click.option(
598
- "--run-id",
599
- default=None,
600
- help="Run ID for the new run. By default, a new run-id will be generated",
601
- hidden=True,
602
- )
603
- @click.option(
604
- "--clone-only/--no-clone-only",
605
- default=False,
606
- show_default=True,
607
- help="Only clone tasks without continuing execution",
608
- hidden=True,
609
- )
610
- @click.option(
611
- "--reentrant/--no-reentrant",
612
- default=False,
613
- show_default=True,
614
- hidden=True,
615
- help="If specified, allows this call to be called in parallel",
616
- )
617
- @click.option(
618
- "--resume-identifier",
619
- default=None,
620
- show_default=True,
621
- hidden=True,
622
- help="If specified, it identifies the task that started this resume call. It is in the form of {step_name}-{task_id}",
623
- )
624
- @click.argument("step-to-rerun", required=False)
625
- @cli.command(help="Resume execution of a previous run of this flow.")
626
- @common_run_options
627
- @click.pass_obj
628
- def resume(
629
- obj,
630
- tags=None,
631
- step_to_rerun=None,
632
- origin_run_id=None,
633
- run_id=None,
634
- clone_only=False,
635
- reentrant=False,
636
- max_workers=None,
637
- max_num_splits=None,
638
- max_log_size=None,
639
- decospecs=None,
640
- run_id_file=None,
641
- resume_identifier=None,
642
- runner_attribute_file=None,
643
- ):
644
- before_run(obj, tags, decospecs)
645
-
646
- if origin_run_id is None:
647
- origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
648
- if origin_run_id is None:
649
- raise CommandException(
650
- "A previous run id was not found. Specify --origin-run-id."
651
- )
652
-
653
- if step_to_rerun is None:
654
- steps_to_rerun = set()
655
- else:
656
- # validate step name
657
- if step_to_rerun not in obj.graph.nodes:
658
- raise CommandException(
659
- "invalid step name {0} specified, must be step present in "
660
- "current form of execution graph. Valid step names include: {1}".format(
661
- step_to_rerun, ",".join(list(obj.graph.nodes.keys()))
662
- )
663
- )
664
- steps_to_rerun = {step_to_rerun}
665
-
666
- if run_id:
667
- # Run-ids that are provided by the metadata service are always integers.
668
- # External providers or run-ids (like external schedulers) always need to
669
- # be non-integers to avoid any clashes. This condition ensures this.
670
- try:
671
- int(run_id)
672
- except:
673
- pass
674
- else:
675
- raise CommandException("run-id %s cannot be an integer" % run_id)
676
-
677
- runtime = NativeRuntime(
678
- obj.flow,
679
- obj.graph,
680
- obj.flow_datastore,
681
- obj.metadata,
682
- obj.environment,
683
- obj.package,
684
- obj.logger,
685
- obj.entrypoint,
686
- obj.event_logger,
687
- obj.monitor,
688
- run_id=run_id,
689
- clone_run_id=origin_run_id,
690
- clone_only=clone_only,
691
- reentrant=reentrant,
692
- steps_to_rerun=steps_to_rerun,
693
- max_workers=max_workers,
694
- max_num_splits=max_num_splits,
695
- max_log_size=max_log_size * 1024 * 1024,
696
- resume_identifier=resume_identifier,
697
- )
698
- write_file(run_id_file, runtime.run_id)
699
- runtime.print_workflow_info()
700
-
701
- runtime.persist_constants()
702
-
703
- if runner_attribute_file:
704
- with open(runner_attribute_file, "w", encoding="utf-8") as f:
705
- json.dump(
706
- {
707
- "run_id": runtime.run_id,
708
- "flow_name": obj.flow.name,
709
- "metadata": obj.metadata.metadata_str(),
710
- },
711
- f,
712
- )
713
-
714
- # We may skip clone-only resume if this is not a resume leader,
715
- # and clone is already complete.
716
- if runtime.should_skip_clone_only_execution():
717
- return
718
-
719
- current._update_env(
720
- {
721
- "run_id": runtime.run_id,
722
- }
723
- )
724
- _system_logger.log_event(
725
- level="info",
726
- module="metaflow.resume",
727
- name="start",
728
- payload={
729
- "msg": "Resuming run",
730
- },
731
- )
732
-
733
- with runtime.run_heartbeat():
734
- if clone_only:
735
- runtime.clone_original_run()
736
- else:
737
- runtime.clone_original_run(generate_task_obj=True, verbose=False)
738
- runtime.execute()
739
-
740
-
741
- @tracing.cli_entrypoint("cli/run")
742
- @parameters.add_custom_parameters(deploy_mode=True)
743
- @cli.command(help="Run the workflow locally.")
744
- @common_run_options
745
- @click.option(
746
- "--namespace",
747
- "user_namespace",
748
- default=None,
749
- help="Change namespace from the default (your username) to "
750
- "the specified tag. Note that this option does not alter "
751
- "tags assigned to the objects produced by this run, just "
752
- "what existing objects are visible in the client API. You "
753
- "can enable the global namespace with an empty string."
754
- "--namespace=",
755
- )
756
- @click.pass_obj
757
- def run(
758
- obj,
759
- tags=None,
760
- max_workers=None,
761
- max_num_splits=None,
762
- max_log_size=None,
763
- decospecs=None,
764
- run_id_file=None,
765
- runner_attribute_file=None,
766
- user_namespace=None,
767
- **kwargs
768
- ):
769
- if user_namespace is not None:
770
- namespace(user_namespace or None)
771
- before_run(obj, tags, decospecs)
772
-
773
- runtime = NativeRuntime(
774
- obj.flow,
775
- obj.graph,
776
- obj.flow_datastore,
777
- obj.metadata,
778
- obj.environment,
779
- obj.package,
780
- obj.logger,
781
- obj.entrypoint,
782
- obj.event_logger,
783
- obj.monitor,
784
- max_workers=max_workers,
785
- max_num_splits=max_num_splits,
786
- max_log_size=max_log_size * 1024 * 1024,
787
- )
788
- write_latest_run_id(obj, runtime.run_id)
789
- write_file(run_id_file, runtime.run_id)
790
-
791
- obj.flow._set_constants(obj.graph, kwargs)
792
- current._update_env(
793
- {
794
- "run_id": runtime.run_id,
795
- }
796
- )
797
- _system_logger.log_event(
798
- level="info",
799
- module="metaflow.run",
800
- name="start",
801
- payload={
802
- "msg": "Starting run",
803
- },
804
- )
805
- runtime.print_workflow_info()
806
- runtime.persist_constants()
807
-
808
- if runner_attribute_file:
809
- with open(runner_attribute_file, "w", encoding="utf-8") as f:
810
- json.dump(
811
- {
812
- "run_id": runtime.run_id,
813
- "flow_name": obj.flow.name,
814
- "metadata": obj.metadata.metadata_str(),
815
- },
816
- f,
817
- )
818
- runtime.execute()
819
-
820
-
821
- def write_file(file_path, content):
822
- if file_path is not None:
823
- with open(file_path, "w") as f:
824
- f.write(str(content))
825
-
826
-
827
- def before_run(obj, tags, decospecs):
828
- validate_tags(tags)
829
-
830
- # There's a --with option both at the top-level and for the run
831
- # subcommand. Why?
832
- #
833
- # "run --with shoes" looks so much better than "--with shoes run".
834
- # This is a very common use case of --with.
835
- #
836
- # A downside is that we need to have the following decorators handling
837
- # in two places in this module and make sure _init_step_decorators
838
- # doesn't get called twice.
839
-
840
- # We want the order to be the following:
841
- # - run level decospecs
842
- # - top level decospecs
843
- # - environment decospecs
844
- all_decospecs = (
845
- list(decospecs or [])
846
- + obj.tl_decospecs
847
- + list(obj.environment.decospecs() or [])
848
- )
849
- if all_decospecs:
850
- decorators._attach_decorators(obj.flow, all_decospecs)
851
- obj.graph = FlowGraph(obj.flow.__class__)
852
-
853
- obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
854
- # obj.environment.init_environment(obj.logger)
855
-
856
- decorators._init_step_decorators(
857
- obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
858
- )
859
-
860
- obj.metadata.add_sticky_tags(tags=tags)
861
-
862
- # Package working directory only once per run.
863
- # We explicitly avoid doing this in `start` since it is invoked for every
864
- # step in the run.
865
- obj.package = MetaflowPackage(
866
- obj.flow, obj.environment, obj.echo, obj.package_suffixes
867
- )
868
-
869
-
870
225
  @cli.command(help="Print the Metaflow version")
871
226
  @click.pass_obj
872
227
  def version(obj):
873
228
  echo_always(obj.version)
874
229
 
875
230
 
876
- @tracing.cli_entrypoint("cli/start")
231
+ # NOTE: add_decorator_options should be TL because it checks to make sure
232
+ # that no option conflict with the ones below
877
233
  @decorators.add_decorator_options
234
+ @config_options
878
235
  @click.command(
879
- cls=click.CommandCollection,
880
- sources=[cli] + plugins.get_plugin_cli(),
236
+ cls=LazyPluginCommandCollection,
237
+ sources=[cli],
238
+ lazy_sources=plugins.get_plugin_cli_path(),
881
239
  invoke_without_command=True,
882
240
  )
241
+ @tracing.cli_entrypoint("cli/start")
242
+ # Quiet is eager to make sure it is available when processing --config options since
243
+ # we need it to construct a context to pass to any DeployTimeField for the default
244
+ # value.
883
245
  @click.option(
884
246
  "--quiet/--not-quiet",
885
247
  show_default=True,
886
248
  default=False,
887
249
  help="Suppress unnecessary messages",
250
+ is_eager=True,
888
251
  )
889
252
  @click.option(
890
253
  "--metadata",
@@ -900,12 +263,14 @@ def version(obj):
900
263
  type=click.Choice(["local"] + [m.TYPE for m in ENVIRONMENTS]),
901
264
  help="Execution environment type",
902
265
  )
266
+ # See comment for --quiet
903
267
  @click.option(
904
268
  "--datastore",
905
269
  default=DEFAULT_DATASTORE,
906
270
  show_default=True,
907
271
  type=click.Choice([d.TYPE for d in DATASTORES]),
908
272
  help="Data backend type",
273
+ is_eager=True,
909
274
  )
910
275
  @click.option("--datastore-root", help="Root path for datastore")
911
276
  @click.option(
@@ -942,6 +307,15 @@ def version(obj):
942
307
  type=click.Choice(MONITOR_SIDECARS),
943
308
  help="Monitoring backend type",
944
309
  )
310
+ @click.option(
311
+ "--local-config-file",
312
+ type=LocalFileInput(exists=True, readable=True, dir_okay=False, resolve_path=True),
313
+ required=False,
314
+ default=None,
315
+ help="A filename containing the dumped configuration values. Internal use only.",
316
+ hidden=True,
317
+ is_eager=True,
318
+ )
945
319
  @click.pass_context
946
320
  def start(
947
321
  ctx,
@@ -955,9 +329,11 @@ def start(
955
329
  pylint=None,
956
330
  event_logger=None,
957
331
  monitor=None,
332
+ local_config_file=None,
333
+ config_file=None,
334
+ config_value=None,
958
335
  **deco_options
959
336
  ):
960
- global echo
961
337
  if quiet:
962
338
  echo = echo_dev_null
963
339
  else:
@@ -972,17 +348,111 @@ def start(
972
348
  echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
973
349
  echo(" for *%s*" % resolve_identity(), fg="magenta")
974
350
 
351
+ # Setup the context
975
352
  cli_args._set_top_kwargs(ctx.params)
976
353
  ctx.obj.echo = echo
977
354
  ctx.obj.echo_always = echo_always
978
355
  ctx.obj.is_quiet = quiet
979
- ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
980
356
  ctx.obj.logger = logger
981
- ctx.obj.check = _check
982
357
  ctx.obj.pylint = pylint
358
+ ctx.obj.check = functools.partial(_check, echo)
983
359
  ctx.obj.top_cli = cli
984
360
  ctx.obj.package_suffixes = package_suffixes.split(",")
985
- ctx.obj.reconstruct_cli = _reconstruct_cli
361
+
362
+ ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
363
+
364
+ if datastore_root is None:
365
+ datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
366
+ ctx.obj.echo
367
+ )
368
+ if datastore_root is None:
369
+ raise CommandException(
370
+ "Could not find the location of the datastore -- did you correctly set the "
371
+ "METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
372
+ )
373
+
374
+ ctx.obj.datastore_impl.datastore_root = datastore_root
375
+
376
+ FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
377
+
378
+ # At this point, we are able to resolve the user-configuration options so we can
379
+ # process all those decorators that the user added that will modify the flow based
380
+ # on those configurations. It is important to do this as early as possible since it
381
+ # actually modifies the flow itself
382
+
383
+ # When we process the options, the first one processed will return None and the
384
+ # second one processed will return the actual options. The order of processing
385
+ # depends on what (and in what order) the user specifies on the command line.
386
+ config_options = config_file or config_value
387
+
388
+ if (
389
+ hasattr(ctx, "saved_args")
390
+ and ctx.saved_args
391
+ and ctx.saved_args[0] == "resume"
392
+ and getattr(ctx.obj, "has_config_options", False)
393
+ ):
394
+ # In the case of resume, we actually need to load the configurations
395
+ # from the resumed run to process them. This can be slightly onerous so check
396
+ # if we need to in the first place
397
+ if getattr(ctx.obj, "has_cl_config_options", False):
398
+ raise click.UsageError(
399
+ "Cannot specify --config-file or --config-value with 'resume'"
400
+ )
401
+ # We now load the config artifacts from the original run id
402
+ run_id = None
403
+ try:
404
+ idx = ctx.saved_args.index("--origin-run-id")
405
+ except ValueError:
406
+ idx = -1
407
+ if idx >= 0:
408
+ run_id = ctx.saved_args[idx + 1]
409
+ else:
410
+ run_id = get_latest_run_id(ctx.obj.echo, ctx.obj.flow.name)
411
+ if run_id is None:
412
+ raise CommandException(
413
+ "A previous run id was not found. Specify --origin-run-id."
414
+ )
415
+ # We get the name of the parameters we need to load from the datastore -- these
416
+ # are accessed using the *variable* name and not necessarily the *parameter* name
417
+ config_var_names = []
418
+ config_param_names = []
419
+ for name, param in ctx.obj.flow._get_parameters():
420
+ if not param.IS_CONFIG_PARAMETER:
421
+ continue
422
+ config_var_names.append(name)
423
+ config_param_names.append(param.name)
424
+
425
+ # We just need a task datastore that will be thrown away -- we do this so
426
+ # we don't have to create the logger, monitor, etc.
427
+ debug.userconf_exec("Loading config parameters from run %s" % run_id)
428
+ for d in TaskDataStoreSet(
429
+ FlowDataStore(ctx.obj.flow.name),
430
+ run_id,
431
+ steps=["_parameters"],
432
+ prefetch_data_artifacts=config_var_names,
433
+ ):
434
+ param_ds = d
435
+
436
+ # We can now set the the CONFIGS value in the flow properly. This will overwrite
437
+ # anything that may have been passed in by default and we will use exactly what
438
+ # the original flow had. Note that these are accessed through the parameter name
439
+ ctx.obj.flow._flow_state[_FlowState.CONFIGS].clear()
440
+ d = ctx.obj.flow._flow_state[_FlowState.CONFIGS]
441
+ for param_name, var_name in zip(config_param_names, config_var_names):
442
+ val = param_ds[var_name]
443
+ debug.userconf_exec("Loaded config %s as: %s" % (param_name, val))
444
+ d[param_name] = val
445
+
446
+ elif getattr(ctx.obj, "delayed_config_exception", None):
447
+ # If we are not doing a resume, any exception we had parsing configs needs to
448
+ # be raised. For resume, since we ignore those options, we ignore the error.
449
+ raise ctx.obj.delayed_config_exception
450
+
451
+ new_cls = ctx.obj.flow._process_config_decorators(config_options)
452
+ if new_cls:
453
+ ctx.obj.flow = new_cls(use_cli=False)
454
+
455
+ ctx.obj.graph = ctx.obj.flow._graph
986
456
 
987
457
  ctx.obj.environment = [
988
458
  e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
@@ -1005,21 +475,6 @@ def start(
1005
475
  ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
1006
476
  )
1007
477
 
1008
- ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
1009
-
1010
- if datastore_root is None:
1011
- datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
1012
- ctx.obj.echo
1013
- )
1014
- if datastore_root is None:
1015
- raise CommandException(
1016
- "Could not find the location of the datastore -- did you correctly set the "
1017
- "METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
1018
- )
1019
-
1020
- ctx.obj.datastore_impl.datastore_root = datastore_root
1021
-
1022
- FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
1023
478
  ctx.obj.flow_datastore = FlowDataStore(
1024
479
  ctx.obj.flow.name,
1025
480
  ctx.obj.environment,
@@ -1028,6 +483,10 @@ def start(
1028
483
  ctx.obj.monitor,
1029
484
  )
1030
485
 
486
+ ctx.obj.config_options = config_options
487
+
488
+ decorators._init(ctx.obj.flow)
489
+
1031
490
  # It is important to initialize flow decorators early as some of the
1032
491
  # things they provide may be used by some of the objects initialized after.
1033
492
  decorators._init_flow_decorators(
@@ -1045,15 +504,32 @@ def start(
1045
504
  # *after* the run decospecs so that they don't take precedence. In other
1046
505
  # words, for the same decorator, we want `myflow.py run --with foo` to
1047
506
  # take precedence over any other `foo` decospec
507
+
508
+ # Note that top-level decospecs are used primarily with non run/resume
509
+ # options as well as with the airflow/argo/sfn integrations which pass
510
+ # all the decospecs (the ones from top-level but also the ones from the
511
+ # run/resume level) through the tl decospecs.
1048
512
  ctx.obj.tl_decospecs = list(decospecs or [])
1049
513
 
1050
514
  # initialize current and parameter context for deploy-time parameters
1051
515
  current._set_env(flow=ctx.obj.flow, is_running=False)
1052
516
  parameters.set_parameter_context(
1053
- ctx.obj.flow.name, ctx.obj.echo, ctx.obj.flow_datastore
517
+ ctx.obj.flow.name,
518
+ ctx.obj.echo,
519
+ ctx.obj.flow_datastore,
520
+ {
521
+ k: ConfigValue(v)
522
+ for k, v in ctx.obj.flow.__class__._flow_state.get(
523
+ _FlowState.CONFIGS, {}
524
+ ).items()
525
+ },
1054
526
  )
1055
527
 
1056
- if ctx.invoked_subcommand not in ("run", "resume"):
528
+ if (
529
+ hasattr(ctx, "saved_args")
530
+ and ctx.saved_args
531
+ and ctx.saved_args[0] not in ("run", "resume")
532
+ ):
1057
533
  # run/resume are special cases because they can add more decorators with --with,
1058
534
  # so they have to take care of themselves.
1059
535
  all_decospecs = ctx.obj.tl_decospecs + list(
@@ -1061,8 +537,10 @@ def start(
1061
537
  )
1062
538
  if all_decospecs:
1063
539
  decorators._attach_decorators(ctx.obj.flow, all_decospecs)
540
+ decorators._init(ctx.obj.flow)
1064
541
  # Regenerate graph if we attached more decorators
1065
- ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
542
+ ctx.obj.flow.__class__._init_attrs()
543
+ ctx.obj.graph = ctx.obj.flow._graph
1066
544
 
1067
545
  decorators._init_step_decorators(
1068
546
  ctx.obj.flow,
@@ -1074,25 +552,12 @@ def start(
1074
552
 
1075
553
  # TODO (savin): Enable lazy instantiation of package
1076
554
  ctx.obj.package = None
555
+
1077
556
  if ctx.invoked_subcommand is None:
1078
557
  ctx.invoke(check)
1079
558
 
1080
559
 
1081
- def _reconstruct_cli(params):
1082
- for k, v in params.items():
1083
- if v:
1084
- if k == "decospecs":
1085
- k = "with"
1086
- k = k.replace("_", "-")
1087
- if not isinstance(v, tuple):
1088
- v = [v]
1089
- for value in v:
1090
- yield "--%s" % k
1091
- if not isinstance(value, bool):
1092
- yield str(value)
1093
-
1094
-
1095
- def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
560
+ def _check(echo, graph, flow, environment, pylint=True, warnings=False, **kwargs):
1096
561
  echo("Validating your flow...", fg="magenta", bold=False)
1097
562
  linter = lint.linter
1098
563
  # TODO set linter settings
@@ -1131,10 +596,13 @@ def _check(graph, flow, environment, pylint=True, warnings=False, **kwargs):
1131
596
 
1132
597
  def print_metaflow_exception(ex):
1133
598
  echo_always(ex.headline, indent=True, nl=False, bold=True)
1134
- if ex.line_no is None:
1135
- echo_always(":")
1136
- else:
1137
- echo_always(" on line %d:" % ex.line_no, bold=True)
599
+ location = ""
600
+ if ex.source_file is not None:
601
+ location += " in file %s" % ex.source_file
602
+ if ex.line_no is not None:
603
+ location += " on line %d" % ex.line_no
604
+ location += ":"
605
+ echo_always(location, bold=True)
1138
606
  echo_always(ex.message, indent=True, bold=False, padding_bottom=True)
1139
607
 
1140
608