metaflow 2.12.36__py2.py3-none-any.whl → 2.12.37__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +54 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +41 -1
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  23. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  24. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  25. metaflow/plugins/datatools/s3/s3op.py +3 -3
  26. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  27. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  28. metaflow/plugins/pypi/conda_decorator.py +22 -0
  29. metaflow/plugins/pypi/pypi_decorator.py +1 -0
  30. metaflow/plugins/timeout_decorator.py +2 -2
  31. metaflow/runner/click_api.py +73 -19
  32. metaflow/runtime.py +111 -73
  33. metaflow/sidecar/sidecar_worker.py +1 -1
  34. metaflow/user_configs/__init__.py +0 -0
  35. metaflow/user_configs/config_decorators.py +563 -0
  36. metaflow/user_configs/config_options.py +495 -0
  37. metaflow/user_configs/config_parameters.py +386 -0
  38. metaflow/util.py +17 -0
  39. metaflow/version.py +1 -1
  40. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/METADATA +3 -2
  41. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/RECORD +45 -35
  42. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/LICENSE +0 -0
  43. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/WHEEL +0 -0
  44. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/entry_points.txt +0 -0
  45. {metaflow-2.12.36.dist-info → metaflow-2.12.37.dist-info}/top_level.txt +0 -0
metaflow/cli_args.py CHANGED
@@ -12,7 +12,14 @@
12
12
  # well as the converting of options in runtime.py. We should make it so that we
13
13
  # can properly shlex things and un-shlex when using. Ideally this should all be
14
14
  # done in one place.
15
+ #
16
+ # NOTE: There is an important between these two as well:
17
+ # - this one will include local_config_file whereas the other one WILL NOT.
18
+ # This is because this is used when constructing the parallel UBF command which
19
+ # executes locally and therefore needs the local_config_file but the other (remote)
20
+ # commands do not.
15
21
 
22
+ from .user_configs.config_options import ConfigInput
16
23
  from .util import to_unicode
17
24
 
18
25
 
@@ -65,6 +72,16 @@ class CLIArgs(object):
65
72
  # keyword in Python, so we call it 'decospecs' in click args
66
73
  if k == "decospecs":
67
74
  k = "with"
75
+ if k in ("config_file_options", "config_value_options"):
76
+ # Special handling here since we gather them all in one option but actually
77
+ # need to send them one at a time using --config-value <name> kv.<name>.
78
+ # Note it can be either config_file_options or config_value_options depending
79
+ # on click processing order.
80
+ for config_name in v.keys():
81
+ yield "--config-value"
82
+ yield to_unicode(config_name)
83
+ yield to_unicode(ConfigInput.make_key_name(config_name))
84
+ continue
68
85
  k = k.replace("_", "-")
69
86
  v = v if isinstance(v, (list, tuple, set)) else [v]
70
87
  for value in v:
File without changes
@@ -0,0 +1,96 @@
1
+ import pickle
2
+
3
+ from metaflow._vendor import click
4
+
5
+ from ..cli import echo_always, echo_dev_null
6
+ from ..datastore import TaskDataStoreSet
7
+ from ..exception import CommandException
8
+
9
+
10
+ @click.command(
11
+ help="Get data artifacts of a task or all tasks in a step. "
12
+ "The format for input-path is either <run_id>/<step_name> or "
13
+ "<run_id>/<step_name>/<task_id>."
14
+ )
15
+ @click.argument("input-path")
16
+ @click.option(
17
+ "--private/--no-private",
18
+ default=False,
19
+ show_default=True,
20
+ help="Show also private attributes.",
21
+ )
22
+ @click.option(
23
+ "--max-value-size",
24
+ default=1000,
25
+ show_default=True,
26
+ type=int,
27
+ help="Show only values that are smaller than this number. "
28
+ "Set to 0 to see only keys.",
29
+ )
30
+ @click.option(
31
+ "--include",
32
+ type=str,
33
+ default="",
34
+ help="Include only artifacts in the given comma-separated list.",
35
+ )
36
+ @click.option(
37
+ "--file", type=str, default=None, help="Serialize artifacts in the given file."
38
+ )
39
+ @click.pass_obj
40
+ def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):
41
+
42
+ if obj.is_quiet:
43
+ echo = echo_dev_null
44
+ else:
45
+ echo = echo_always
46
+
47
+ output = {}
48
+ kwargs = {
49
+ "show_private": private,
50
+ "max_value_size": max_value_size,
51
+ "include": {t for t in include.split(",") if t},
52
+ }
53
+
54
+ # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
55
+ parts = input_path.split("/")
56
+ if len(parts) == 2:
57
+ run_id, step_name = parts
58
+ task_id = None
59
+ elif len(parts) == 3:
60
+ run_id, step_name, task_id = parts
61
+ else:
62
+ raise CommandException(
63
+ "input_path should either be run_id/step_name or run_id/step_name/task_id"
64
+ )
65
+
66
+ datastore_set = TaskDataStoreSet(
67
+ obj.flow_datastore,
68
+ run_id,
69
+ steps=[step_name],
70
+ prefetch_data_artifacts=kwargs.get("include"),
71
+ )
72
+ if task_id:
73
+ ds_list = [datastore_set.get_with_pathspec(input_path)]
74
+ else:
75
+ ds_list = list(datastore_set) # get all tasks
76
+
77
+ for ds in ds_list:
78
+ echo(
79
+ "Dumping output of run_id=*{run_id}* "
80
+ "step=*{step}* task_id=*{task_id}*".format(
81
+ run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
82
+ ),
83
+ fg="magenta",
84
+ )
85
+
86
+ if file is None:
87
+ echo_always(
88
+ ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
89
+ )
90
+ else:
91
+ output[ds.pathspec] = ds.to_dict(**kwargs)
92
+
93
+ if file is not None:
94
+ with open(file, "wb") as f:
95
+ pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
96
+ echo("Artifacts written to *%s*" % file)
@@ -0,0 +1,51 @@
1
+ from metaflow._vendor import click
2
+
3
+ from .. import parameters
4
+ from ..runtime import NativeRuntime
5
+
6
+
7
+ @parameters.add_custom_parameters(deploy_mode=False)
8
+ @click.command(help="Internal command to initialize a run.", hidden=True)
9
+ @click.option(
10
+ "--run-id",
11
+ default=None,
12
+ required=True,
13
+ help="ID for one execution of all steps in the flow.",
14
+ )
15
+ @click.option(
16
+ "--task-id", default=None, required=True, help="ID for this instance of the step."
17
+ )
18
+ @click.option(
19
+ "--tag",
20
+ "tags",
21
+ multiple=True,
22
+ default=None,
23
+ help="Tags for this instance of the step.",
24
+ )
25
+ @click.pass_obj
26
+ def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
27
+ # init is a separate command instead of an option in 'step'
28
+ # since we need to capture user-specified parameters with
29
+ # @add_custom_parameters. Adding custom parameters to 'step'
30
+ # is not desirable due to the possibility of name clashes between
31
+ # user-specified parameters and our internal options. Note that
32
+ # user-specified parameters are often defined as environment
33
+ # variables.
34
+
35
+ obj.metadata.add_sticky_tags(tags=tags)
36
+
37
+ runtime = NativeRuntime(
38
+ obj.flow,
39
+ obj.graph,
40
+ obj.flow_datastore,
41
+ obj.metadata,
42
+ obj.environment,
43
+ obj.package,
44
+ obj.logger,
45
+ obj.entrypoint,
46
+ obj.event_logger,
47
+ obj.monitor,
48
+ run_id=run_id,
49
+ )
50
+ obj.flow._set_constants(obj.graph, kwargs, obj.config_options)
51
+ runtime.persist_constants(task_id=task_id)
@@ -0,0 +1,358 @@
1
+ import json
2
+
3
+ from functools import wraps
4
+
5
+ from metaflow._vendor import click
6
+
7
+ from .. import decorators, namespace, parameters, tracing
8
+ from ..exception import CommandException
9
+ from ..graph import FlowGraph
10
+ from ..metaflow_current import current
11
+ from ..package import MetaflowPackage
12
+ from ..runtime import NativeRuntime
13
+ from ..system import _system_logger
14
+
15
+ from ..tagging_util import validate_tags
16
+ from ..util import get_latest_run_id, write_latest_run_id
17
+
18
+
19
+ def before_run(obj, tags, decospecs):
20
+ validate_tags(tags)
21
+
22
+ # There's a --with option both at the top-level and for the run
23
+ # subcommand. Why?
24
+ #
25
+ # "run --with shoes" looks so much better than "--with shoes run".
26
+ # This is a very common use case of --with.
27
+ #
28
+ # A downside is that we need to have the following decorators handling
29
+ # in two places in this module and make sure _init_step_decorators
30
+ # doesn't get called twice.
31
+
32
+ # We want the order to be the following:
33
+ # - run level decospecs
34
+ # - top level decospecs
35
+ # - environment decospecs
36
+ all_decospecs = (
37
+ list(decospecs or [])
38
+ + obj.tl_decospecs
39
+ + list(obj.environment.decospecs() or [])
40
+ )
41
+ if all_decospecs:
42
+ decorators._attach_decorators(obj.flow, all_decospecs)
43
+ decorators._init(obj.flow, only_non_static=True)
44
+ obj.graph = FlowGraph(obj.flow.__class__)
45
+
46
+ obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
47
+ # obj.environment.init_environment(obj.logger)
48
+
49
+ decorators._init_step_decorators(
50
+ obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
51
+ )
52
+
53
+ obj.metadata.add_sticky_tags(tags=tags)
54
+
55
+ # Package working directory only once per run.
56
+ # We explicitly avoid doing this in `start` since it is invoked for every
57
+ # step in the run.
58
+ obj.package = MetaflowPackage(
59
+ obj.flow, obj.environment, obj.echo, obj.package_suffixes
60
+ )
61
+
62
+
63
+ def write_file(file_path, content):
64
+ if file_path is not None:
65
+ with open(file_path, "w", encoding="utf-8") as f:
66
+ f.write(str(content))
67
+
68
+
69
+ def common_run_options(func):
70
+ @click.option(
71
+ "--tag",
72
+ "tags",
73
+ multiple=True,
74
+ default=None,
75
+ help="Annotate this run with the given tag. You can specify "
76
+ "this option multiple times to attach multiple tags in "
77
+ "the run.",
78
+ )
79
+ @click.option(
80
+ "--max-workers",
81
+ default=16,
82
+ show_default=True,
83
+ help="Maximum number of parallel processes.",
84
+ )
85
+ @click.option(
86
+ "--max-num-splits",
87
+ default=100,
88
+ show_default=True,
89
+ help="Maximum number of splits allowed in a foreach. This "
90
+ "is a safety check preventing bugs from triggering "
91
+ "thousands of steps inadvertently.",
92
+ )
93
+ @click.option(
94
+ "--max-log-size",
95
+ default=10,
96
+ show_default=True,
97
+ help="Maximum size of stdout and stderr captured in "
98
+ "megabytes. If a step outputs more than this to "
99
+ "stdout/stderr, its output will be truncated.",
100
+ )
101
+ @click.option(
102
+ "--with",
103
+ "decospecs",
104
+ multiple=True,
105
+ help="Add a decorator to all steps. You can specify this "
106
+ "option multiple times to attach multiple decorators "
107
+ "in steps.",
108
+ )
109
+ @click.option(
110
+ "--run-id-file",
111
+ default=None,
112
+ show_default=True,
113
+ type=str,
114
+ help="Write the ID of this run to the file specified.",
115
+ )
116
+ @click.option(
117
+ "--runner-attribute-file",
118
+ default=None,
119
+ show_default=True,
120
+ type=str,
121
+ help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
122
+ )
123
+ @wraps(func)
124
+ def wrapper(*args, **kwargs):
125
+ return func(*args, **kwargs)
126
+
127
+ return wrapper
128
+
129
+
130
+ @click.option(
131
+ "--origin-run-id",
132
+ default=None,
133
+ help="ID of the run that should be resumed. By default, the "
134
+ "last run executed locally.",
135
+ )
136
+ @click.option(
137
+ "--run-id",
138
+ default=None,
139
+ help="Run ID for the new run. By default, a new run-id will be generated",
140
+ hidden=True,
141
+ )
142
+ @click.option(
143
+ "--clone-only/--no-clone-only",
144
+ default=False,
145
+ show_default=True,
146
+ help="Only clone tasks without continuing execution",
147
+ hidden=True,
148
+ )
149
+ @click.option(
150
+ "--reentrant/--no-reentrant",
151
+ default=False,
152
+ show_default=True,
153
+ hidden=True,
154
+ help="If specified, allows this call to be called in parallel",
155
+ )
156
+ @click.option(
157
+ "--resume-identifier",
158
+ default=None,
159
+ show_default=True,
160
+ hidden=True,
161
+ help="If specified, it identifies the task that started this resume call. It is in the form of {step_name}-{task_id}",
162
+ )
163
+ @click.argument("step-to-rerun", required=False)
164
+ @click.command(help="Resume execution of a previous run of this flow.")
165
+ @common_run_options
166
+ @click.pass_obj
167
+ def resume(
168
+ obj,
169
+ tags=None,
170
+ step_to_rerun=None,
171
+ origin_run_id=None,
172
+ run_id=None,
173
+ clone_only=False,
174
+ reentrant=False,
175
+ max_workers=None,
176
+ max_num_splits=None,
177
+ max_log_size=None,
178
+ decospecs=None,
179
+ run_id_file=None,
180
+ resume_identifier=None,
181
+ runner_attribute_file=None,
182
+ ):
183
+ before_run(obj, tags, decospecs)
184
+
185
+ if origin_run_id is None:
186
+ origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
187
+ if origin_run_id is None:
188
+ raise CommandException(
189
+ "A previous run id was not found. Specify --origin-run-id."
190
+ )
191
+
192
+ if step_to_rerun is None:
193
+ steps_to_rerun = set()
194
+ else:
195
+ # validate step name
196
+ if step_to_rerun not in obj.graph.nodes:
197
+ raise CommandException(
198
+ "invalid step name {0} specified, must be step present in "
199
+ "current form of execution graph. Valid step names include: {1}".format(
200
+ step_to_rerun, ",".join(list(obj.graph.nodes.keys()))
201
+ )
202
+ )
203
+ steps_to_rerun = {step_to_rerun}
204
+
205
+ if run_id:
206
+ # Run-ids that are provided by the metadata service are always integers.
207
+ # External providers or run-ids (like external schedulers) always need to
208
+ # be non-integers to avoid any clashes. This condition ensures this.
209
+ try:
210
+ int(run_id)
211
+ except:
212
+ pass
213
+ else:
214
+ raise CommandException("run-id %s cannot be an integer" % run_id)
215
+
216
+ runtime = NativeRuntime(
217
+ obj.flow,
218
+ obj.graph,
219
+ obj.flow_datastore,
220
+ obj.metadata,
221
+ obj.environment,
222
+ obj.package,
223
+ obj.logger,
224
+ obj.entrypoint,
225
+ obj.event_logger,
226
+ obj.monitor,
227
+ run_id=run_id,
228
+ clone_run_id=origin_run_id,
229
+ clone_only=clone_only,
230
+ reentrant=reentrant,
231
+ steps_to_rerun=steps_to_rerun,
232
+ max_workers=max_workers,
233
+ max_num_splits=max_num_splits,
234
+ max_log_size=max_log_size * 1024 * 1024,
235
+ resume_identifier=resume_identifier,
236
+ )
237
+ write_file(run_id_file, runtime.run_id)
238
+ runtime.print_workflow_info()
239
+
240
+ runtime.persist_constants()
241
+
242
+ if runner_attribute_file:
243
+ with open(runner_attribute_file, "w", encoding="utf-8") as f:
244
+ json.dump(
245
+ {
246
+ "run_id": runtime.run_id,
247
+ "flow_name": obj.flow.name,
248
+ "metadata": obj.metadata.metadata_str(),
249
+ },
250
+ f,
251
+ )
252
+
253
+ # We may skip clone-only resume if this is not a resume leader,
254
+ # and clone is already complete.
255
+ if runtime.should_skip_clone_only_execution():
256
+ return
257
+
258
+ current._update_env(
259
+ {
260
+ "run_id": runtime.run_id,
261
+ }
262
+ )
263
+ _system_logger.log_event(
264
+ level="info",
265
+ module="metaflow.resume",
266
+ name="start",
267
+ payload={
268
+ "msg": "Resuming run",
269
+ },
270
+ )
271
+
272
+ with runtime.run_heartbeat():
273
+ if clone_only:
274
+ runtime.clone_original_run()
275
+ else:
276
+ runtime.clone_original_run(generate_task_obj=True, verbose=False)
277
+ runtime.execute()
278
+
279
+
280
+ @parameters.add_custom_parameters(deploy_mode=True)
281
+ @click.command(help="Run the workflow locally.")
282
+ @tracing.cli_entrypoint("cli/run")
283
+ @common_run_options
284
+ @click.option(
285
+ "--namespace",
286
+ "user_namespace",
287
+ default=None,
288
+ help="Change namespace from the default (your username) to "
289
+ "the specified tag. Note that this option does not alter "
290
+ "tags assigned to the objects produced by this run, just "
291
+ "what existing objects are visible in the client API. You "
292
+ "can enable the global namespace with an empty string."
293
+ "--namespace=",
294
+ )
295
+ @click.pass_obj
296
+ def run(
297
+ obj,
298
+ tags=None,
299
+ max_workers=None,
300
+ max_num_splits=None,
301
+ max_log_size=None,
302
+ decospecs=None,
303
+ run_id_file=None,
304
+ runner_attribute_file=None,
305
+ user_namespace=None,
306
+ **kwargs
307
+ ):
308
+ if user_namespace is not None:
309
+ namespace(user_namespace or None)
310
+ before_run(obj, tags, decospecs)
311
+
312
+ runtime = NativeRuntime(
313
+ obj.flow,
314
+ obj.graph,
315
+ obj.flow_datastore,
316
+ obj.metadata,
317
+ obj.environment,
318
+ obj.package,
319
+ obj.logger,
320
+ obj.entrypoint,
321
+ obj.event_logger,
322
+ obj.monitor,
323
+ max_workers=max_workers,
324
+ max_num_splits=max_num_splits,
325
+ max_log_size=max_log_size * 1024 * 1024,
326
+ )
327
+ write_latest_run_id(obj, runtime.run_id)
328
+ write_file(run_id_file, runtime.run_id)
329
+
330
+ obj.flow._set_constants(obj.graph, kwargs, obj.config_options)
331
+ current._update_env(
332
+ {
333
+ "run_id": runtime.run_id,
334
+ }
335
+ )
336
+ _system_logger.log_event(
337
+ level="info",
338
+ module="metaflow.run",
339
+ name="start",
340
+ payload={
341
+ "msg": "Starting run",
342
+ },
343
+ )
344
+ with runtime.run_heartbeat():
345
+ runtime.print_workflow_info()
346
+ runtime.persist_constants()
347
+
348
+ if runner_attribute_file:
349
+ with open(runner_attribute_file, "w", encoding="utf-8") as f:
350
+ json.dump(
351
+ {
352
+ "run_id": runtime.run_id,
353
+ "flow_name": obj.flow.name,
354
+ "metadata": obj.metadata.metadata_str(),
355
+ },
356
+ f,
357
+ )
358
+ runtime.execute()