metaflow 2.12.36__py2.py3-none-any.whl → 2.12.38__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +63 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +41 -1
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  23. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  24. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  25. metaflow/plugins/datatools/s3/s3op.py +3 -3
  26. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  27. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  28. metaflow/plugins/pypi/conda_decorator.py +22 -0
  29. metaflow/plugins/pypi/pypi_decorator.py +1 -0
  30. metaflow/plugins/timeout_decorator.py +2 -2
  31. metaflow/runner/click_api.py +73 -19
  32. metaflow/runtime.py +111 -73
  33. metaflow/sidecar/sidecar_worker.py +1 -1
  34. metaflow/user_configs/__init__.py +0 -0
  35. metaflow/user_configs/config_decorators.py +563 -0
  36. metaflow/user_configs/config_options.py +495 -0
  37. metaflow/user_configs/config_parameters.py +386 -0
  38. metaflow/util.py +17 -0
  39. metaflow/version.py +1 -1
  40. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/METADATA +3 -2
  41. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/RECORD +45 -35
  42. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/LICENSE +0 -0
  43. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/WHEEL +0 -0
  44. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/entry_points.txt +0 -0
  45. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,495 @@
1
+ import importlib
2
+ import json
3
+ import os
4
+
5
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
6
+
7
+ from metaflow._vendor import click
8
+ from metaflow.debug import debug
9
+
10
+ from .config_parameters import CONFIG_FILE, ConfigValue
11
+ from ..exception import MetaflowException, MetaflowInternalError
12
+ from ..parameters import DeployTimeField, ParameterContext, current_flow
13
+ from ..util import get_username
14
+
15
+
16
+ _CONVERT_PREFIX = "@!c!@:"
17
+ _DEFAULT_PREFIX = "@!d!@:"
18
+ _NO_FILE = "@!n!@"
19
+
20
+ _CONVERTED_DEFAULT = _CONVERT_PREFIX + _DEFAULT_PREFIX
21
+ _CONVERTED_NO_FILE = _CONVERT_PREFIX + _NO_FILE
22
+ _CONVERTED_DEFAULT_NO_FILE = _CONVERTED_DEFAULT + _NO_FILE
23
+
24
+
25
+ def _load_config_values(info_file: Optional[str] = None) -> Optional[Dict[Any, Any]]:
26
+ if info_file is None:
27
+ info_file = os.path.basename(CONFIG_FILE)
28
+ try:
29
+ with open(info_file, encoding="utf-8") as contents:
30
+ return json.load(contents).get("user_configs", {})
31
+ except IOError:
32
+ return None
33
+
34
+
35
+ class ConvertPath(click.Path):
36
+ name = "ConvertPath"
37
+
38
+ def convert(self, value, param, ctx):
39
+ if isinstance(value, str) and value.startswith(_CONVERT_PREFIX):
40
+ return value
41
+ is_default = False
42
+ if value and value.startswith(_DEFAULT_PREFIX):
43
+ is_default = True
44
+ value = super().convert(value[len(_DEFAULT_PREFIX) :], param, ctx)
45
+ return self.convert_value(value, is_default)
46
+
47
+ @staticmethod
48
+ def mark_as_default(value):
49
+ if value is None:
50
+ return None
51
+ return _DEFAULT_PREFIX + str(value)
52
+
53
+ @staticmethod
54
+ def convert_value(value, is_default):
55
+ default_str = _DEFAULT_PREFIX if is_default else ""
56
+ if value is None:
57
+ return None
58
+ try:
59
+ with open(value, "r", encoding="utf-8") as f:
60
+ content = f.read()
61
+ except OSError:
62
+ return _CONVERT_PREFIX + default_str + _NO_FILE + value
63
+ return _CONVERT_PREFIX + default_str + content
64
+
65
+
66
+ class ConvertDictOrStr(click.ParamType):
67
+ name = "ConvertDictOrStr"
68
+
69
+ def convert(self, value, param, ctx):
70
+ is_default = False
71
+ if isinstance(value, str):
72
+ if value.startswith(_CONVERT_PREFIX):
73
+ return value
74
+ if value.startswith(_DEFAULT_PREFIX):
75
+ is_default = True
76
+ value = value[len(_DEFAULT_PREFIX) :]
77
+
78
+ return self.convert_value(value, is_default)
79
+
80
+ @staticmethod
81
+ def convert_value(value, is_default):
82
+ default_str = _DEFAULT_PREFIX if is_default else ""
83
+ if value is None:
84
+ return None
85
+
86
+ if isinstance(value, dict):
87
+ return _CONVERT_PREFIX + default_str + json.dumps(value)
88
+
89
+ if value.startswith(_CONVERT_PREFIX):
90
+ return value
91
+
92
+ return _CONVERT_PREFIX + default_str + value
93
+
94
+ @staticmethod
95
+ def mark_as_default(value):
96
+ if value is None:
97
+ return None
98
+ if isinstance(value, dict):
99
+ return _DEFAULT_PREFIX + json.dumps(value)
100
+ return _DEFAULT_PREFIX + str(value)
101
+
102
+
103
+ class MultipleTuple(click.Tuple):
104
+ # Small wrapper around a click.Tuple to allow the environment variable for
105
+ # configurations to be a JSON string. Otherwise the default behavior is splitting
106
+ # by whitespace which is totally not what we want
107
+ # You can now pass multiple configuration options through an environment variable
108
+ # using something like:
109
+ # METAFLOW_FLOW_CONFIG_VALUE='{"config1": {"key0": "value0"}, "config2": {"key1": "value1"}}'
110
+ # or METAFLOW_FLOW_CONFIG='{"config1": "file1", "config2": "file2"}'
111
+
112
+ def split_envvar_value(self, rv):
113
+ loaded = json.loads(rv)
114
+ return list(
115
+ item if isinstance(item, str) else json.dumps(item)
116
+ for pair in loaded.items()
117
+ for item in pair
118
+ )
119
+
120
+
121
+ class ConfigInput:
122
+ # ConfigInput is an internal class responsible for processing all the --config and
123
+ # --config-value options.
124
+ # It gathers information from the --local-config-file (to figure out
125
+ # where options are stored) and is also responsible for processing any `--config` or
126
+ # `--config-value` options. Note that the process_configs function will be called
127
+ # *twice* (once for the configs and another for the config-values). This makes
128
+ # this function a little bit more tricky. We need to wait for both calls before
129
+ # being able to process anything.
130
+
131
+ # It will then store this information in the flow spec for use later in processing.
132
+ # It is stored in the flow spec to avoid being global to support the Runner.
133
+
134
+ loaded_configs = None # type: Optional[Dict[str, Dict[Any, Any]]]
135
+ config_file = None # type: Optional[str]
136
+
137
+ def __init__(
138
+ self,
139
+ req_configs: List[str],
140
+ defaults: Dict[str, Tuple[Union[str, Dict[Any, Any]], bool]],
141
+ parsers: Dict[str, Union[str, Callable[[str], Dict[Any, Any]]]],
142
+ ):
143
+ self._req_configs = set(req_configs)
144
+ self._defaults = defaults
145
+ self._parsers = parsers
146
+ self._path_values = None
147
+ self._value_values = None
148
+
149
+ @staticmethod
150
+ def make_key_name(name: str) -> str:
151
+ # Special mark to indicate that the configuration value is not content or a file
152
+ # name but a value that should be read in the config file (effectively where
153
+ # the value has already been materialized).
154
+ return "kv." + name.lower()
155
+
156
+ @classmethod
157
+ def set_config_file(cls, config_file: str):
158
+ cls.config_file = config_file
159
+
160
+ @classmethod
161
+ def get_config(cls, config_name: str) -> Optional[Dict[Any, Any]]:
162
+ if cls.loaded_configs is None:
163
+ all_configs = _load_config_values(cls.config_file)
164
+ if all_configs is None:
165
+ raise MetaflowException(
166
+ "Could not load expected configuration values "
167
+ "from the CONFIG_PARAMETERS file. This is a Metaflow bug. "
168
+ "Please contact support."
169
+ )
170
+ cls.loaded_configs = all_configs
171
+ return cls.loaded_configs.get(config_name, None)
172
+
173
+ def process_configs(self, ctx, param, value):
174
+ from ..cli import echo_always, echo_dev_null # Prevent circular import
175
+ from ..flowspec import _FlowState # Prevent circular import
176
+
177
+ flow_cls = getattr(current_flow, "flow_cls", None)
178
+ if flow_cls is None:
179
+ # This is an error
180
+ raise MetaflowInternalError(
181
+ "Config values should be processed for a FlowSpec"
182
+ )
183
+
184
+ # This function is called by click when processing all the --config and
185
+ # --config-value options.
186
+ # The value passed in is a list of tuples (name, value).
187
+ # Click will provide:
188
+ # - all the defaults if nothing is provided on the command line
189
+ # - provide *just* the passed in value if anything is provided on the command
190
+ # line.
191
+ #
192
+ # We need to get all config and config-value options and click will call this
193
+ # function twice. We will first get all the values on the command line and
194
+ # *then* merge with the defaults to form a full set of values.
195
+ # We therefore get a full set of values where:
196
+ # - the name will correspond to the configuration name
197
+ # - the value will be:
198
+ # - the default (including None if there is no default). If the default is
199
+ # not None, it will start with _CONVERTED_DEFAULT since Click will make
200
+ # the value go through ConvertPath or ConvertDictOrStr
201
+ # - the actual value passed through prefixed with _CONVERT_PREFIX
202
+
203
+ debug.userconf_exec(
204
+ "Processing configs for %s -- incoming values: %s"
205
+ % (param.name, str(value))
206
+ )
207
+
208
+ do_return = self._value_values is None and self._path_values is None
209
+ # We only keep around non default values. We could simplify by checking just one
210
+ # value and if it is default it means all are but this doesn't seem much more effort
211
+ # and is clearer
212
+ if param.name == "config_value_options":
213
+ self._value_values = {
214
+ k.lower(): v
215
+ for k, v in value
216
+ if v is not None and not v.startswith(_CONVERTED_DEFAULT)
217
+ }
218
+ else:
219
+ self._path_values = {
220
+ k.lower(): v
221
+ for k, v in value
222
+ if v is not None and not v.startswith(_CONVERTED_DEFAULT)
223
+ }
224
+ if do_return:
225
+ # One of config_value_options or config_file_options will be None
226
+ debug.userconf_exec("Incomplete config options; waiting for more")
227
+ return None
228
+
229
+ # The second go around, we process all the values and merge them.
230
+
231
+ # If we are processing options that start with kv., we know we are in a subprocess
232
+ # and ignore other stuff. In particular, environment variables used to pass
233
+ # down configurations (like METAFLOW_FLOW_CONFIG) could still be present and
234
+ # would cause an issue -- we can ignore those as the kv. values should trump
235
+ # everything else.
236
+ all_keys = set(self._value_values).union(self._path_values)
237
+ # Make sure we have at least some keys (ie: some non default values)
238
+ has_all_kv = all_keys and all(
239
+ self._value_values.get(k, "").startswith(_CONVERT_PREFIX + "kv.")
240
+ for k in all_keys
241
+ )
242
+
243
+ flow_cls._flow_state[_FlowState.CONFIGS] = {}
244
+ to_return = {}
245
+
246
+ if not has_all_kv:
247
+ # Check that the user didn't provide *both* a path and a value.
248
+ common_keys = set(self._value_values or []).intersection(
249
+ [k for k, v in self._path_values.items()] or []
250
+ )
251
+ if common_keys:
252
+ raise click.UsageError(
253
+ "Cannot provide both a value and a file for the same configuration. "
254
+ "Found such values for '%s'" % "', '".join(common_keys)
255
+ )
256
+
257
+ all_values = dict(self._path_values or {})
258
+ all_values.update(self._value_values or {})
259
+
260
+ debug.userconf_exec("All config values: %s" % str(all_values))
261
+
262
+ merged_configs = {}
263
+ for name, (val, is_path) in self._defaults.items():
264
+ n = name.lower()
265
+ if n in all_values:
266
+ merged_configs[n] = all_values[n]
267
+ else:
268
+ if isinstance(val, DeployTimeField):
269
+ # This supports a default value that is a deploy-time field (similar
270
+ # to Parameter).)
271
+ # We will form our own context and pass it down -- note that you cannot
272
+ # use configs in the default value of configs as this introduces a bit
273
+ # of circularity. Note also that quiet and datastore are *eager*
274
+ # options so are available here.
275
+ param_ctx = ParameterContext(
276
+ flow_name=ctx.obj.flow.name,
277
+ user_name=get_username(),
278
+ parameter_name=n,
279
+ logger=(
280
+ echo_dev_null if ctx.params["quiet"] else echo_always
281
+ ),
282
+ ds_type=ctx.params["datastore"],
283
+ configs=None,
284
+ )
285
+ val = val.fun(param_ctx)
286
+ if is_path:
287
+ # This is a file path
288
+ merged_configs[n] = ConvertPath.convert_value(val, False)
289
+ else:
290
+ # This is a value
291
+ merged_configs[n] = ConvertDictOrStr.convert_value(val, False)
292
+ else:
293
+ debug.userconf_exec("Fast path due to pre-processed values")
294
+ merged_configs = self._value_values
295
+ debug.userconf_exec("Configs merged with defaults: %s" % str(merged_configs))
296
+
297
+ missing_configs = set()
298
+ no_file = []
299
+ no_default_file = []
300
+ msgs = []
301
+ for name, val in merged_configs.items():
302
+ if val is None:
303
+ missing_configs.add(name)
304
+ continue
305
+ if val.startswith(_CONVERTED_NO_FILE):
306
+ no_file.append(name)
307
+ continue
308
+ if val.startswith(_CONVERTED_DEFAULT_NO_FILE):
309
+ no_default_file.append(name)
310
+ continue
311
+ val = val[len(_CONVERT_PREFIX) :] # Remove the _CONVERT_PREFIX
312
+ if val.startswith("kv."):
313
+ # This means to load it from a file
314
+ read_value = self.get_config(val[3:])
315
+ if read_value is None:
316
+ raise click.UsageError(
317
+ "Could not find configuration '%s' in INFO file" % val
318
+ )
319
+ flow_cls._flow_state[_FlowState.CONFIGS][name] = read_value
320
+ to_return[name] = ConfigValue(read_value)
321
+ else:
322
+ if self._parsers[name]:
323
+ read_value = self._call_parser(self._parsers[name], val)
324
+ else:
325
+ try:
326
+ read_value = json.loads(val)
327
+ except json.JSONDecodeError as e:
328
+ msgs.append(
329
+ "configuration value for '%s' is not valid JSON: %s"
330
+ % (name, e)
331
+ )
332
+ continue
333
+ # TODO: Support YAML
334
+ flow_cls._flow_state[_FlowState.CONFIGS][name] = read_value
335
+ to_return[name] = ConfigValue(read_value)
336
+
337
+ reqs = missing_configs.intersection(self._req_configs)
338
+ for missing in reqs:
339
+ msgs.append("missing configuration for '%s'" % missing)
340
+ for missing in no_file:
341
+ msgs.append(
342
+ "configuration file '%s' could not be read for '%s'"
343
+ % (merged_configs[missing][len(_CONVERTED_NO_FILE) :], missing)
344
+ )
345
+ for missing in no_default_file:
346
+ msgs.append(
347
+ "default configuration file '%s' could not be read for '%s'"
348
+ % (merged_configs[missing][len(_CONVERTED_DEFAULT_NO_FILE) :], missing)
349
+ )
350
+ if msgs:
351
+ raise click.UsageError(
352
+ "Bad values passed for configuration options: %s" % ", ".join(msgs)
353
+ )
354
+
355
+ debug.userconf_exec("Finalized configs: %s" % str(to_return))
356
+ return to_return
357
+
358
+ def __str__(self):
359
+ return repr(self)
360
+
361
+ def __repr__(self):
362
+ return "ConfigInput"
363
+
364
+ @staticmethod
365
+ def _call_parser(parser, val):
366
+ if isinstance(parser, str):
367
+ if len(parser) and parser[0] == ".":
368
+ parser = "metaflow" + parser
369
+ path, func = parser.rsplit(".", 1)
370
+ try:
371
+ func_module = importlib.import_module(path)
372
+ except ImportError as e:
373
+ raise ValueError("Cannot locate parser %s" % parser) from e
374
+ parser = getattr(func_module, func, None)
375
+ if parser is None or not callable(parser):
376
+ raise ValueError(
377
+ "Parser %s is either not part of %s or not a callable"
378
+ % (func, path)
379
+ )
380
+ return parser(val)
381
+
382
+
383
+ class LocalFileInput(click.Path):
384
+ # Small wrapper around click.Path to set the value from which to read configuration
385
+ # values. This is set immediately upon processing the --local-config-file
386
+ # option and will therefore then be available when processing any of the other
387
+ # --config options (which will call ConfigInput.process_configs
388
+ name = "LocalFileInput"
389
+
390
+ def convert(self, value, param, ctx):
391
+ v = super().convert(value, param, ctx)
392
+ ConfigInput.set_config_file(value)
393
+ return v
394
+
395
+ def __str__(self):
396
+ return repr(self)
397
+
398
+ def __repr__(self):
399
+ return "LocalFileInput"
400
+
401
+
402
+ def config_options(cmd):
403
+ help_strs = []
404
+ required_names = []
405
+ defaults = {}
406
+ config_seen = set()
407
+ parsers = {}
408
+ flow_cls = getattr(current_flow, "flow_cls", None)
409
+ if flow_cls is None:
410
+ return cmd
411
+
412
+ parameters = [p for _, p in flow_cls._get_parameters() if p.IS_CONFIG_PARAMETER]
413
+ # List all the configuration options
414
+ for arg in parameters[::-1]:
415
+ kwargs = arg.option_kwargs(False)
416
+ if arg.name.lower() in config_seen:
417
+ msg = (
418
+ "Multiple configurations use the same name '%s'. Note that names are "
419
+ "case-insensitive. Please change the "
420
+ "names of some of your configurations" % arg.name
421
+ )
422
+ raise MetaflowException(msg)
423
+ config_seen.add(arg.name.lower())
424
+ if kwargs["required"]:
425
+ required_names.append(arg.name)
426
+
427
+ defaults[arg.name.lower()] = (
428
+ arg.kwargs.get("default", None),
429
+ arg._default_is_file,
430
+ )
431
+ help_strs.append(" - %s: %s" % (arg.name.lower(), kwargs.get("help", "")))
432
+ parsers[arg.name.lower()] = arg.parser
433
+
434
+ if not config_seen:
435
+ # No configurations -- don't add anything
436
+ return cmd
437
+
438
+ help_str = (
439
+ "Configuration options for the flow. "
440
+ "Multiple configurations can be specified."
441
+ )
442
+ help_str = "\n\n".join([help_str] + help_strs)
443
+ cb_func = ConfigInput(required_names, defaults, parsers).process_configs
444
+
445
+ cmd.params.insert(
446
+ 0,
447
+ click.Option(
448
+ ["--config-value", "config_value_options"],
449
+ nargs=2,
450
+ multiple=True,
451
+ type=MultipleTuple([click.Choice(config_seen), ConvertDictOrStr()]),
452
+ callback=cb_func,
453
+ help=help_str,
454
+ envvar="METAFLOW_FLOW_CONFIG_VALUE",
455
+ show_default=False,
456
+ default=[
457
+ (
458
+ k,
459
+ (
460
+ ConvertDictOrStr.mark_as_default(v[0])
461
+ if not callable(v[0]) and not v[1]
462
+ else None
463
+ ),
464
+ )
465
+ for k, v in defaults.items()
466
+ ],
467
+ required=False,
468
+ ),
469
+ )
470
+ cmd.params.insert(
471
+ 0,
472
+ click.Option(
473
+ ["--config", "config_file_options"],
474
+ nargs=2,
475
+ multiple=True,
476
+ type=MultipleTuple([click.Choice(config_seen), ConvertPath()]),
477
+ callback=cb_func,
478
+ help=help_str,
479
+ envvar="METAFLOW_FLOW_CONFIG",
480
+ show_default=False,
481
+ default=[
482
+ (
483
+ k,
484
+ (
485
+ ConvertPath.mark_as_default(v[0])
486
+ if not callable(v[0]) and v[1]
487
+ else None
488
+ ),
489
+ )
490
+ for k, v in defaults.items()
491
+ ],
492
+ required=False,
493
+ ),
494
+ )
495
+ return cmd