metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. metaflow/__init__.py +1 -0
  2. metaflow/cli.py +78 -13
  3. metaflow/cli_components/run_cmds.py +182 -39
  4. metaflow/cli_components/step_cmd.py +160 -4
  5. metaflow/client/__init__.py +1 -0
  6. metaflow/client/core.py +162 -99
  7. metaflow/client/filecache.py +59 -32
  8. metaflow/cmd/code/__init__.py +2 -1
  9. metaflow/datastore/__init__.py +1 -0
  10. metaflow/datastore/content_addressed_store.py +40 -9
  11. metaflow/datastore/datastore_set.py +10 -1
  12. metaflow/datastore/flow_datastore.py +123 -4
  13. metaflow/datastore/spin_datastore.py +91 -0
  14. metaflow/datastore/task_datastore.py +86 -2
  15. metaflow/decorators.py +75 -6
  16. metaflow/extension_support/__init__.py +372 -305
  17. metaflow/flowspec.py +3 -2
  18. metaflow/graph.py +2 -2
  19. metaflow/metaflow_config.py +41 -0
  20. metaflow/metaflow_profile.py +18 -0
  21. metaflow/packaging_sys/utils.py +2 -39
  22. metaflow/packaging_sys/v1.py +63 -16
  23. metaflow/plugins/__init__.py +2 -0
  24. metaflow/plugins/argo/argo_workflows.py +20 -25
  25. metaflow/plugins/argo/param_val.py +19 -0
  26. metaflow/plugins/cards/card_datastore.py +13 -13
  27. metaflow/plugins/cards/card_decorator.py +1 -0
  28. metaflow/plugins/cards/card_modules/basic.py +9 -3
  29. metaflow/plugins/datastores/local_storage.py +12 -6
  30. metaflow/plugins/datastores/spin_storage.py +12 -0
  31. metaflow/plugins/datatools/s3/s3.py +29 -10
  32. metaflow/plugins/datatools/s3/s3op.py +90 -62
  33. metaflow/plugins/metadata_providers/local.py +76 -82
  34. metaflow/plugins/metadata_providers/spin.py +16 -0
  35. metaflow/runner/click_api.py +4 -2
  36. metaflow/runner/metaflow_runner.py +210 -19
  37. metaflow/runtime.py +348 -21
  38. metaflow/task.py +61 -12
  39. metaflow/user_configs/config_parameters.py +2 -4
  40. metaflow/user_decorators/mutable_flow.py +1 -1
  41. metaflow/user_decorators/user_step_decorator.py +10 -1
  42. metaflow/util.py +191 -1
  43. metaflow/version.py +1 -1
  44. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
  45. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
  46. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
  47. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  48. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  49. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
  50. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
  51. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
  52. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
metaflow/__init__.py CHANGED
@@ -146,6 +146,7 @@ from .client import (
146
146
  metadata,
147
147
  get_metadata,
148
148
  default_metadata,
149
+ inspect_spin,
149
150
  Metaflow,
150
151
  Flow,
151
152
  Run,
metaflow/cli.py CHANGED
@@ -1,3 +1,4 @@
1
+ import os
1
2
  import functools
2
3
  import inspect
3
4
  import os
@@ -7,7 +8,6 @@ from datetime import datetime
7
8
 
8
9
  import metaflow.tracing as tracing
9
10
  from metaflow._vendor import click
10
- from metaflow.system import _system_logger, _system_monitor
11
11
 
12
12
  from . import decorators, lint, metaflow_version, parameters, plugins
13
13
  from .cli_args import cli_args
@@ -27,6 +27,8 @@ from .metaflow_config import (
27
27
  DEFAULT_PACKAGE_SUFFIXES,
28
28
  )
29
29
  from .metaflow_current import current
30
+ from .metaflow_profile import from_start
31
+ from metaflow.system import _system_monitor, _system_logger
30
32
  from .metaflow_environment import MetaflowEnvironment
31
33
  from .packaging_sys import MetaflowCodeContent
32
34
  from .plugins import (
@@ -38,9 +40,9 @@ from .plugins import (
38
40
  )
39
41
  from .pylint_wrapper import PyLint
40
42
  from .R import metaflow_r_version, use_r
43
+ from .util import get_latest_run_id, resolve_identity, decompress_list
41
44
  from .user_configs.config_options import LocalFileInput, config_options
42
45
  from .user_configs.config_parameters import ConfigValue
43
- from .util import get_latest_run_id, resolve_identity
44
46
 
45
47
  ERASE_TO_EOL = "\033[K"
46
48
  HIGHLIGHT = "red"
@@ -125,6 +127,8 @@ def logger(body="", system_msg=False, head="", bad=False, timestamp=True, nl=Tru
125
127
  "step": "metaflow.cli_components.step_cmd.step",
126
128
  "run": "metaflow.cli_components.run_cmds.run",
127
129
  "resume": "metaflow.cli_components.run_cmds.resume",
130
+ "spin": "metaflow.cli_components.run_cmds.spin",
131
+ "spin-step": "metaflow.cli_components.step_cmd.spin_step",
128
132
  },
129
133
  )
130
134
  def cli(ctx):
@@ -318,6 +322,13 @@ def version(obj):
318
322
  hidden=True,
319
323
  is_eager=True,
320
324
  )
325
+ @click.option(
326
+ "--mode",
327
+ type=click.Choice(["spin"]),
328
+ default=None,
329
+ help="Execution mode for metaflow CLI commands. Use 'spin' to enable "
330
+ "spin metadata and spin datastore for executions",
331
+ )
321
332
  @click.pass_context
322
333
  def start(
323
334
  ctx,
@@ -335,6 +346,7 @@ def start(
335
346
  local_config_file=None,
336
347
  config=None,
337
348
  config_value=None,
349
+ mode=None,
338
350
  **deco_options
339
351
  ):
340
352
  if quiet:
@@ -347,6 +359,7 @@ def start(
347
359
  if use_r():
348
360
  version = metaflow_r_version()
349
361
 
362
+ from_start("MetaflowCLI: Starting")
350
363
  echo("Metaflow %s" % version, fg="magenta", bold=True, nl=False)
351
364
  echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
352
365
  echo(" for *%s*" % resolve_identity(), fg="magenta")
@@ -366,6 +379,7 @@ def start(
366
379
  ctx.obj.check = functools.partial(_check, echo)
367
380
  ctx.obj.top_cli = cli
368
381
  ctx.obj.package_suffixes = package_suffixes.split(",")
382
+ ctx.obj.spin_mode = mode == "spin"
369
383
 
370
384
  ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
371
385
 
@@ -472,19 +486,12 @@ def start(
472
486
  # set force rebuild flag for environments that support it.
473
487
  ctx.obj.environment._force_rebuild = force_rebuild_environments
474
488
  ctx.obj.environment.validate_environment(ctx.obj.logger, datastore)
475
-
476
489
  ctx.obj.event_logger = LOGGING_SIDECARS[event_logger](
477
490
  flow=ctx.obj.flow, env=ctx.obj.environment
478
491
  )
479
- ctx.obj.event_logger.start()
480
- _system_logger.init_system_logger(ctx.obj.flow.name, ctx.obj.event_logger)
481
-
482
492
  ctx.obj.monitor = MONITOR_SIDECARS[monitor](
483
493
  flow=ctx.obj.flow, env=ctx.obj.environment
484
494
  )
485
- ctx.obj.monitor.start()
486
- _system_monitor.init_system_monitor(ctx.obj.flow.name, ctx.obj.monitor)
487
-
488
495
  ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == metadata][0](
489
496
  ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
490
497
  )
@@ -498,6 +505,57 @@ def start(
498
505
  )
499
506
 
500
507
  ctx.obj.config_options = config_options
508
+ ctx.obj.is_spin = False
509
+ ctx.obj.skip_decorators = False
510
+
511
+ # Override values for spin steps, or if we are in spin mode
512
+ if (
513
+ hasattr(ctx, "saved_args")
514
+ and ctx.saved_args
515
+ and "spin" in ctx.saved_args[0]
516
+ or ctx.obj.spin_mode
517
+ ):
518
+ # To minimize side effects for spin, we will only use the following:
519
+ # - local metadata provider,
520
+ # - local datastore,
521
+ # - local environment,
522
+ # - null event logger,
523
+ # - null monitor
524
+ ctx.obj.is_spin = True
525
+ if "--skip-decorators" in ctx.saved_args:
526
+ ctx.obj.skip_decorators = True
527
+
528
+ ctx.obj.event_logger = LOGGING_SIDECARS["nullSidecarLogger"](
529
+ flow=ctx.obj.flow, env=ctx.obj.environment
530
+ )
531
+ ctx.obj.monitor = MONITOR_SIDECARS["nullSidecarMonitor"](
532
+ flow=ctx.obj.flow, env=ctx.obj.environment
533
+ )
534
+ # Use spin metadata, spin datastore, and spin datastore root
535
+ ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][0](
536
+ ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
537
+ )
538
+ ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == "spin"][0]
539
+ datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
540
+ ctx.obj.echo, create_on_absent=True
541
+ )
542
+ ctx.obj.datastore_impl.datastore_root = datastore_root
543
+
544
+ ctx.obj.flow_datastore = FlowDataStore(
545
+ ctx.obj.flow.name,
546
+ ctx.obj.environment, # Same environment as run/resume
547
+ ctx.obj.metadata, # local metadata
548
+ ctx.obj.event_logger, # null event logger
549
+ ctx.obj.monitor, # null monitor
550
+ storage_impl=ctx.obj.datastore_impl,
551
+ )
552
+
553
+ # Start event logger and monitor
554
+ ctx.obj.event_logger.start()
555
+ _system_logger.init_system_logger(ctx.obj.flow.name, ctx.obj.event_logger)
556
+
557
+ ctx.obj.monitor.start()
558
+ _system_monitor.init_system_monitor(ctx.obj.flow.name, ctx.obj.monitor)
501
559
 
502
560
  decorators._init(ctx.obj.flow)
503
561
 
@@ -512,9 +570,11 @@ def start(
512
570
  ctx.obj.logger,
513
571
  echo,
514
572
  deco_options,
573
+ ctx.obj.is_spin,
574
+ ctx.obj.skip_decorators,
515
575
  )
516
576
 
517
- # In the case of run/resume, we will want to apply the TL decospecs
577
+ # In the case of run/resume/spin, we will want to apply the TL decospecs
518
578
  # *after* the run decospecs so that they don't take precedence. In other
519
579
  # words, for the same decorator, we want `myflow.py run --with foo` to
520
580
  # take precedence over any other `foo` decospec
@@ -542,11 +602,10 @@ def start(
542
602
  if (
543
603
  hasattr(ctx, "saved_args")
544
604
  and ctx.saved_args
545
- and ctx.saved_args[0] not in ("run", "resume")
605
+ and ctx.saved_args[0] not in ("run", "resume", "spin")
546
606
  ):
547
- # run/resume are special cases because they can add more decorators with --with,
607
+ # run/resume/spin are special cases because they can add more decorators with --with,
548
608
  # so they have to take care of themselves.
549
-
550
609
  all_decospecs = ctx.obj.tl_decospecs + list(
551
610
  ctx.obj.environment.decospecs() or []
552
611
  )
@@ -556,6 +615,9 @@ def start(
556
615
  # or a scheduler setting them up in their own way.
557
616
  if ctx.saved_args[0] not in ("step", "init"):
558
617
  all_decospecs += DEFAULT_DECOSPECS.split()
618
+ elif ctx.saved_args[0] == "spin-step":
619
+ # If we are in spin-args, we will not attach any decorators
620
+ all_decospecs = []
559
621
  if all_decospecs:
560
622
  decorators._attach_decorators(ctx.obj.flow, all_decospecs)
561
623
  decorators._init(ctx.obj.flow)
@@ -569,6 +631,9 @@ def start(
569
631
  ctx.obj.environment,
570
632
  ctx.obj.flow_datastore,
571
633
  ctx.obj.logger,
634
+ # The last two arguments are only used for spin steps
635
+ ctx.obj.is_spin,
636
+ ctx.obj.skip_decorators,
572
637
  )
573
638
 
574
639
  # Check the graph again (mutators may have changed it)
@@ -8,21 +8,26 @@ from .. import decorators, namespace, parameters, tracing
8
8
  from ..exception import CommandException
9
9
  from ..graph import FlowGraph
10
10
  from ..metaflow_current import current
11
- from ..metaflow_config import DEFAULT_DECOSPECS, FEAT_ALWAYS_UPLOAD_CODE_PACKAGE
11
+ from ..metaflow_config import (
12
+ DEFAULT_DECOSPECS,
13
+ FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
14
+ SPIN_PERSIST,
15
+ )
16
+ from ..metaflow_profile import from_start
12
17
  from ..package import MetaflowPackage
13
- from ..runtime import NativeRuntime
18
+ from ..runtime import NativeRuntime, SpinRuntime
14
19
  from ..system import _system_logger
15
20
 
16
21
  # from ..client.core import Run
17
22
 
18
23
  from ..tagging_util import validate_tags
19
- from ..util import get_latest_run_id, write_latest_run_id
24
+ from ..util import get_latest_run_id, write_latest_run_id, parse_spin_pathspec
20
25
 
21
26
 
22
- def before_run(obj, tags, decospecs):
27
+ def before_run(obj, tags, decospecs, skip_decorators=False):
23
28
  validate_tags(tags)
24
29
 
25
- # There's a --with option both at the top-level and for the run
30
+ # There's a --with option both at the top-level and for the run/resume/spin
26
31
  # subcommand. Why?
27
32
  #
28
33
  # "run --with shoes" looks so much better than "--with shoes run".
@@ -36,26 +41,36 @@ def before_run(obj, tags, decospecs):
36
41
  # - run level decospecs
37
42
  # - top level decospecs
38
43
  # - environment decospecs
39
- all_decospecs = (
40
- list(decospecs or [])
41
- + obj.tl_decospecs
42
- + list(obj.environment.decospecs() or [])
43
- )
44
- if all_decospecs:
45
- # These decospecs are the ones from run/resume PLUS the ones from the
46
- # environment (for example the @conda)
47
- decorators._attach_decorators(obj.flow, all_decospecs)
48
- decorators._init(obj.flow)
49
- # Regenerate graph if we attached more decorators
50
- obj.flow.__class__._init_graph()
51
- obj.graph = obj.flow._graph
52
-
53
- obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
54
- # obj.environment.init_environment(obj.logger)
55
-
56
- decorators._init_step_decorators(
57
- obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
44
+ from_start(
45
+ f"Inside before_run, skip_decorators={skip_decorators}, is_spin={obj.is_spin}"
58
46
  )
47
+ if not skip_decorators:
48
+ all_decospecs = (
49
+ list(decospecs or [])
50
+ + obj.tl_decospecs
51
+ + list(obj.environment.decospecs() or [])
52
+ )
53
+ if all_decospecs:
54
+ # These decospecs are the ones from run/resume/spin PLUS the ones from the
55
+ # environment (for example the @conda)
56
+ decorators._attach_decorators(obj.flow, all_decospecs)
57
+ decorators._init(obj.flow)
58
+ # Regenerate graph if we attached more decorators
59
+ obj.flow.__class__._init_graph()
60
+ obj.graph = obj.flow._graph
61
+
62
+ obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
63
+ # obj.environment.init_environment(obj.logger)
64
+
65
+ decorators._init_step_decorators(
66
+ obj.flow,
67
+ obj.graph,
68
+ obj.environment,
69
+ obj.flow_datastore,
70
+ obj.logger,
71
+ obj.is_spin,
72
+ skip_decorators,
73
+ )
59
74
  # Re-read graph since it may have been modified by mutators
60
75
  obj.graph = obj.flow._graph
61
76
 
@@ -73,6 +88,29 @@ def before_run(obj, tags, decospecs):
73
88
  )
74
89
 
75
90
 
91
+ def common_runner_options(func):
92
+ @click.option(
93
+ "--run-id-file",
94
+ default=None,
95
+ show_default=True,
96
+ type=str,
97
+ help="Write the ID of this run to the file specified.",
98
+ )
99
+ @click.option(
100
+ "--runner-attribute-file",
101
+ default=None,
102
+ show_default=True,
103
+ type=str,
104
+ help="Write the metadata and pathspec of this run to the file specified. Used internally "
105
+ "for Metaflow's Runner API.",
106
+ )
107
+ @wraps(func)
108
+ def wrapper(*args, **kwargs):
109
+ return func(*args, **kwargs)
110
+
111
+ return wrapper
112
+
113
+
76
114
  def write_file(file_path, content):
77
115
  if file_path is not None:
78
116
  with open(file_path, "w", encoding="utf-8") as f:
@@ -137,20 +175,6 @@ def common_run_options(func):
137
175
  "in steps.",
138
176
  callback=config_callback,
139
177
  )
140
- @click.option(
141
- "--run-id-file",
142
- default=None,
143
- show_default=True,
144
- type=str,
145
- help="Write the ID of this run to the file specified.",
146
- )
147
- @click.option(
148
- "--runner-attribute-file",
149
- default=None,
150
- show_default=True,
151
- type=str,
152
- help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
153
- )
154
178
  @wraps(func)
155
179
  def wrapper(*args, **kwargs):
156
180
  return func(*args, **kwargs)
@@ -195,6 +219,7 @@ def common_run_options(func):
195
219
  @click.command(help="Resume execution of a previous run of this flow.")
196
220
  @tracing.cli("cli/resume")
197
221
  @common_run_options
222
+ @common_runner_options
198
223
  @click.pass_obj
199
224
  def resume(
200
225
  obj,
@@ -326,6 +351,7 @@ def resume(
326
351
  @click.command(help="Run the workflow locally.")
327
352
  @tracing.cli("cli/run")
328
353
  @common_run_options
354
+ @common_runner_options
329
355
  @click.option(
330
356
  "--namespace",
331
357
  "user_namespace",
@@ -348,7 +374,7 @@ def run(
348
374
  run_id_file=None,
349
375
  runner_attribute_file=None,
350
376
  user_namespace=None,
351
- **kwargs
377
+ **kwargs,
352
378
  ):
353
379
  if user_namespace is not None:
354
380
  namespace(user_namespace or None)
@@ -401,3 +427,120 @@ def run(
401
427
  )
402
428
  with runtime.run_heartbeat():
403
429
  runtime.execute()
430
+
431
+
432
+ # @parameters.add_custom_parameters(deploy_mode=True)
433
+ @click.command(help="Spins up a task for a given step from a previous run locally.")
434
+ @tracing.cli("cli/spin")
435
+ @click.argument("pathspec")
436
+ @click.option(
437
+ "--skip-decorators/--no-skip-decorators",
438
+ is_flag=True,
439
+ # Default False matches the saved_args check in cli.py for spin steps - skip_decorators
440
+ # only becomes True when explicitly passed, otherwise decorators are applied by default
441
+ default=False,
442
+ show_default=True,
443
+ help="Skip decorators attached to the step or flow.",
444
+ )
445
+ @click.option(
446
+ "--artifacts-module",
447
+ default=None,
448
+ show_default=True,
449
+ help="Path to a module that contains artifacts to be used in the spun step. "
450
+ "The artifacts should be defined as a dictionary called ARTIFACTS with keys as "
451
+ "the artifact names and values as the artifact values. The artifact values will "
452
+ "overwrite the default values of the artifacts used in the spun step.",
453
+ )
454
+ @click.option(
455
+ "--persist/--no-persist",
456
+ "persist",
457
+ default=SPIN_PERSIST,
458
+ show_default=True,
459
+ help="Whether to persist the artifacts in the spun step. If set to False, "
460
+ "the artifacts will not be persisted and will not be available in the spun step's "
461
+ "datastore.",
462
+ )
463
+ @click.option(
464
+ "--max-log-size",
465
+ default=10,
466
+ show_default=True,
467
+ help="Maximum size of stdout and stderr captured in "
468
+ "megabytes. If a step outputs more than this to "
469
+ "stdout/stderr, its output will be truncated.",
470
+ )
471
+ @common_runner_options
472
+ @click.pass_obj
473
+ def spin(
474
+ obj,
475
+ pathspec,
476
+ persist=True,
477
+ artifacts_module=None,
478
+ skip_decorators=False,
479
+ max_log_size=None,
480
+ run_id_file=None,
481
+ runner_attribute_file=None,
482
+ **kwargs,
483
+ ):
484
+ # Parse the pathspec argument to extract step name and full pathspec
485
+ step_name, parsed_pathspec = parse_spin_pathspec(pathspec, obj.flow.name)
486
+
487
+ before_run(obj, [], [], skip_decorators)
488
+ obj.echo(f"Spinning up step *{step_name}* locally for flow *{obj.flow.name}*")
489
+ # For spin, flow parameters come from the original run, but _set_constants
490
+ # requires them in kwargs. Use parameter defaults as placeholders - they'll be
491
+ # overwritten when the spin step loads artifacts from the original run.
492
+ flow_param_defaults = {}
493
+ for var, param in obj.flow._get_parameters():
494
+ if not param.IS_CONFIG_PARAMETER:
495
+ default_value = param.kwargs.get("default")
496
+ # Use None for required parameters without defaults
497
+ flow_param_defaults[param.name.replace("-", "_").lower()] = default_value
498
+ obj.flow._set_constants(obj.graph, flow_param_defaults, obj.config_options)
499
+ step_func = getattr(obj.flow, step_name, None)
500
+ if step_func is None:
501
+ raise CommandException(
502
+ f"Step '{step_name}' not found in flow '{obj.flow.name}'. "
503
+ "Please provide a valid step name."
504
+ )
505
+ from_start("Spin: before spin runtime init")
506
+ spin_runtime = SpinRuntime(
507
+ obj.flow,
508
+ obj.graph,
509
+ obj.flow_datastore,
510
+ obj.metadata,
511
+ obj.environment,
512
+ obj.package,
513
+ obj.logger,
514
+ obj.entrypoint,
515
+ obj.event_logger,
516
+ obj.monitor,
517
+ step_func,
518
+ step_name,
519
+ parsed_pathspec,
520
+ skip_decorators,
521
+ artifacts_module,
522
+ persist,
523
+ max_log_size * 1024 * 1024,
524
+ )
525
+ write_latest_run_id(obj, spin_runtime.run_id)
526
+ write_file(run_id_file, spin_runtime.run_id)
527
+ # We only need the root for the metadata, i.e. the portion before DATASTORE_LOCAL_DIR
528
+ datastore_root = spin_runtime._flow_datastore._storage_impl.datastore_root
529
+ orig_task_metadata_root = datastore_root.rsplit("/", 1)[0]
530
+ from_start("Spin: going to execute")
531
+ spin_runtime.execute()
532
+ from_start("Spin: after spin runtime execute")
533
+
534
+ if runner_attribute_file:
535
+ with open(runner_attribute_file, "w") as f:
536
+ json.dump(
537
+ {
538
+ "task_id": spin_runtime.task.task_id,
539
+ "step_name": step_name,
540
+ "run_id": spin_runtime.run_id,
541
+ "flow_name": obj.flow.name,
542
+ # Store metadata in a format that can be used by the Runner API
543
+ "metadata": f"{obj.metadata.__class__.TYPE}@{orig_task_metadata_root}",
544
+ },
545
+ f,
546
+ )
@@ -1,12 +1,17 @@
1
1
  from metaflow._vendor import click
2
2
 
3
- from .. import decorators, namespace
3
+ from .. import namespace
4
4
  from ..cli import echo_always, echo_dev_null
5
5
  from ..cli_args import cli_args
6
+ from ..datastore.flow_datastore import FlowDataStore
6
7
  from ..exception import CommandException
8
+ from ..client.filecache import FileCache, FileBlobCache, TaskMetadataCache
9
+ from ..metaflow_config import SPIN_ALLOWED_DECORATORS
10
+ from ..metaflow_profile import from_start
11
+ from ..plugins import DATASTORES
7
12
  from ..task import MetaflowTask
8
13
  from ..unbounded_foreach import UBF_CONTROL, UBF_TASK
9
- from ..util import decompress_list
14
+ from ..util import decompress_list, read_artifacts_module
10
15
  import metaflow.tracing as tracing
11
16
 
12
17
 
@@ -109,7 +114,6 @@ def step(
109
114
  ubf_context="none",
110
115
  num_parallel=None,
111
116
  ):
112
-
113
117
  if ctx.obj.is_quiet:
114
118
  echo = echo_dev_null
115
119
  else:
@@ -118,7 +122,7 @@ def step(
118
122
  if ubf_context == "none":
119
123
  ubf_context = None
120
124
  if opt_namespace is not None:
121
- namespace(opt_namespace or None)
125
+ namespace(opt_namespace)
122
126
 
123
127
  func = None
124
128
  try:
@@ -176,3 +180,155 @@ def step(
176
180
  )
177
181
 
178
182
  echo("Success", fg="green", bold=True, indent=True)
183
+
184
+
185
+ @click.command(help="Internal command to spin a single task.", hidden=True)
186
+ @click.argument("step-name")
187
+ @click.option(
188
+ "--run-id",
189
+ default=None,
190
+ required=True,
191
+ help="Original run ID for the step that will be spun",
192
+ )
193
+ @click.option(
194
+ "--task-id",
195
+ default=None,
196
+ required=True,
197
+ help="Original Task ID for the step that will be spun",
198
+ )
199
+ @click.option(
200
+ "--orig-flow-datastore",
201
+ show_default=True,
202
+ help="Original datastore for the flow from which a task is being spun",
203
+ )
204
+ @click.option(
205
+ "--input-paths",
206
+ help="A comma-separated list of pathspecs specifying inputs for this step.",
207
+ )
208
+ @click.option(
209
+ "--split-index",
210
+ type=int,
211
+ default=None,
212
+ show_default=True,
213
+ help="Index of this foreach split.",
214
+ )
215
+ @click.option(
216
+ "--retry-count",
217
+ default=0,
218
+ help="How many times we have attempted to run this task.",
219
+ )
220
+ @click.option(
221
+ "--max-user-code-retries",
222
+ default=0,
223
+ help="How many times we should attempt running the user code.",
224
+ )
225
+ @click.option(
226
+ "--namespace",
227
+ "opt_namespace",
228
+ default=None,
229
+ help="Change namespace from the default (your username) to the specified tag.",
230
+ )
231
+ @click.option(
232
+ "--skip-decorators/--no-skip-decorators",
233
+ is_flag=True,
234
+ default=False,
235
+ show_default=True,
236
+ help="Skip decorators attached to the step or flow.",
237
+ )
238
+ @click.option(
239
+ "--persist/--no-persist",
240
+ "persist",
241
+ default=True,
242
+ show_default=True,
243
+ help="Whether to persist the artifacts in the spun step. If set to false, the artifacts will not"
244
+ " be persisted and will not be available in the spun step's datastore.",
245
+ )
246
+ @click.option(
247
+ "--artifacts-module",
248
+ default=None,
249
+ show_default=True,
250
+ help="Path to a module that contains artifacts to be used in the spun step. The artifacts should "
251
+ "be defined as a dictionary called ARTIFACTS with keys as the artifact names and values as the "
252
+ "artifact values. The artifact values will overwrite the default values of the artifacts used in "
253
+ "the spun step.",
254
+ )
255
+ @click.pass_context
256
+ def spin_step(
257
+ ctx,
258
+ step_name,
259
+ orig_flow_datastore,
260
+ run_id=None,
261
+ task_id=None,
262
+ input_paths=None,
263
+ split_index=None,
264
+ retry_count=None,
265
+ max_user_code_retries=None,
266
+ opt_namespace=None,
267
+ skip_decorators=False,
268
+ artifacts_module=None,
269
+ persist=True,
270
+ ):
271
+ import time
272
+
273
+ if ctx.obj.is_quiet:
274
+ echo = echo_dev_null
275
+ else:
276
+ echo = echo_always
277
+
278
+ if opt_namespace is not None:
279
+ namespace(opt_namespace)
280
+
281
+ input_paths = decompress_list(input_paths) if input_paths else []
282
+
283
+ skip_decorators = skip_decorators
284
+ whitelist_decorators = [] if skip_decorators else SPIN_ALLOWED_DECORATORS
285
+ from_start("SpinStep: initialized decorators")
286
+ spin_artifacts = read_artifacts_module(artifacts_module) if artifacts_module else {}
287
+ from_start("SpinStep: read artifacts module")
288
+
289
+ ds_type, ds_root = orig_flow_datastore.split("@")
290
+ orig_datastore_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
291
+ orig_datastore_impl.datastore_root = ds_root
292
+ orig_flow_datastore = FlowDataStore(
293
+ ctx.obj.flow.name,
294
+ environment=None,
295
+ storage_impl=orig_datastore_impl,
296
+ ds_root=ds_root,
297
+ )
298
+
299
+ filecache = FileCache()
300
+ orig_flow_datastore.set_metadata_cache(
301
+ TaskMetadataCache(filecache, ds_type, ds_root, ctx.obj.flow.name)
302
+ )
303
+ orig_flow_datastore.ca_store.set_blob_cache(
304
+ FileBlobCache(
305
+ filecache, FileCache.flow_ds_id(ds_type, ds_root, ctx.obj.flow.name)
306
+ )
307
+ )
308
+
309
+ task = MetaflowTask(
310
+ ctx.obj.flow,
311
+ ctx.obj.flow_datastore,
312
+ ctx.obj.metadata,
313
+ ctx.obj.environment,
314
+ echo,
315
+ ctx.obj.event_logger,
316
+ ctx.obj.monitor,
317
+ None, # no unbounded foreach context
318
+ orig_flow_datastore=orig_flow_datastore,
319
+ spin_artifacts=spin_artifacts,
320
+ )
321
+ from_start("SpinStep: initialized task")
322
+ task.run_step(
323
+ step_name,
324
+ run_id,
325
+ task_id,
326
+ None,
327
+ input_paths,
328
+ split_index,
329
+ retry_count,
330
+ max_user_code_retries,
331
+ whitelist_decorators,
332
+ persist,
333
+ )
334
+ from_start("SpinStep: ran step")
@@ -6,6 +6,7 @@ from .core import (
6
6
  metadata,
7
7
  get_metadata,
8
8
  default_metadata,
9
+ inspect_spin,
9
10
  Metaflow,
10
11
  Flow,
11
12
  Run,