metaflow 2.12.36__py2.py3-none-any.whl → 2.12.38__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +63 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +41 -1
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  23. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  24. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  25. metaflow/plugins/datatools/s3/s3op.py +3 -3
  26. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  27. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  28. metaflow/plugins/pypi/conda_decorator.py +22 -0
  29. metaflow/plugins/pypi/pypi_decorator.py +1 -0
  30. metaflow/plugins/timeout_decorator.py +2 -2
  31. metaflow/runner/click_api.py +73 -19
  32. metaflow/runtime.py +111 -73
  33. metaflow/sidecar/sidecar_worker.py +1 -1
  34. metaflow/user_configs/__init__.py +0 -0
  35. metaflow/user_configs/config_decorators.py +563 -0
  36. metaflow/user_configs/config_options.py +495 -0
  37. metaflow/user_configs/config_parameters.py +386 -0
  38. metaflow/util.py +17 -0
  39. metaflow/version.py +1 -1
  40. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/METADATA +3 -2
  41. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/RECORD +45 -35
  42. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/LICENSE +0 -0
  43. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/WHEEL +0 -0
  44. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/entry_points.txt +0 -0
  45. {metaflow-2.12.36.dist-info → metaflow-2.12.38.dist-info}/top_level.txt +0 -0
@@ -60,6 +60,7 @@ from metaflow.plugins.kubernetes.kubernetes import (
60
60
  )
61
61
  from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
62
62
  from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
63
+ from metaflow.user_configs.config_options import ConfigInput
63
64
  from metaflow.util import (
64
65
  compress_list,
65
66
  dict_to_cli_options,
@@ -168,6 +169,7 @@ class ArgoWorkflows(object):
168
169
  self.enable_heartbeat_daemon = enable_heartbeat_daemon
169
170
  self.enable_error_msg_capture = enable_error_msg_capture
170
171
  self.parameters = self._process_parameters()
172
+ self.config_parameters = self._process_config_parameters()
171
173
  self.triggers, self.trigger_options = self._process_triggers()
172
174
  self._schedule, self._timezone = self._get_schedule()
173
175
 
@@ -455,6 +457,10 @@ class ArgoWorkflows(object):
455
457
  "case-insensitive." % param.name
456
458
  )
457
459
  seen.add(norm)
460
+ # NOTE: We skip config parameters as these do not have dynamic values,
461
+ # and need to be treated differently.
462
+ if param.IS_CONFIG_PARAMETER:
463
+ continue
458
464
 
459
465
  extra_attrs = {}
460
466
  if param.kwargs.get("type") == JSONType:
@@ -488,6 +494,7 @@ class ArgoWorkflows(object):
488
494
  # execution - which needs to be fixed imminently.
489
495
  if not is_required or default_value is not None:
490
496
  default_value = json.dumps(default_value)
497
+
491
498
  parameters[param.name] = dict(
492
499
  name=param.name,
493
500
  value=default_value,
@@ -498,6 +505,27 @@ class ArgoWorkflows(object):
498
505
  )
499
506
  return parameters
500
507
 
508
+ def _process_config_parameters(self):
509
+ parameters = []
510
+ seen = set()
511
+ for var, param in self.flow._get_parameters():
512
+ if not param.IS_CONFIG_PARAMETER:
513
+ continue
514
+ # Throw an exception if the parameter is specified twice.
515
+ norm = param.name.lower()
516
+ if norm in seen:
517
+ raise MetaflowException(
518
+ "Parameter *%s* is specified twice. "
519
+ "Note that parameter names are "
520
+ "case-insensitive." % param.name
521
+ )
522
+ seen.add(norm)
523
+
524
+ parameters.append(
525
+ dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
526
+ )
527
+ return parameters
528
+
501
529
  def _process_triggers(self):
502
530
  # Impute triggers for Argo Workflow Template specified through @trigger and
503
531
  # @trigger_on_finish decorators
@@ -520,8 +548,13 @@ class ArgoWorkflows(object):
520
548
  # convert them to lower case since Metaflow parameters are case
521
549
  # insensitive.
522
550
  seen = set()
551
+ # NOTE: We skip config parameters as their values can not be set through event payloads
523
552
  params = set(
524
- [param.name.lower() for var, param in self.flow._get_parameters()]
553
+ [
554
+ param.name.lower()
555
+ for var, param in self.flow._get_parameters()
556
+ if not param.IS_CONFIG_PARAMETER
557
+ ]
525
558
  )
526
559
  trigger_deco = self.flow._flow_decorators.get("trigger")[0]
527
560
  trigger_deco.format_deploytime_value()
@@ -1720,6 +1753,13 @@ class ArgoWorkflows(object):
1720
1753
  metaflow_version["production_token"] = self.production_token
1721
1754
  env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
1722
1755
 
1756
+ # map config values
1757
+ cfg_env = {
1758
+ param["name"]: param["kv_name"] for param in self.config_parameters
1759
+ }
1760
+ if cfg_env:
1761
+ env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
1762
+
1723
1763
  # Set the template inputs and outputs for passing state. Very simply,
1724
1764
  # the container template takes in input-paths as input and outputs
1725
1765
  # the task-id (which feeds in as input-paths to the subsequent task).
@@ -470,6 +470,7 @@ def make_flow(
470
470
  decorators._attach_decorators(
471
471
  obj.flow, [KubernetesDecorator.name, EnvironmentDecorator.name]
472
472
  )
473
+ decorators._init(obj.flow)
473
474
 
474
475
  decorators._init_step_decorators(
475
476
  obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
@@ -138,8 +138,8 @@ class BatchDecorator(StepDecorator):
138
138
  supports_conda_environment = True
139
139
  target_platform = "linux-64"
140
140
 
141
- def __init__(self, attributes=None, statically_defined=False):
142
- super(BatchDecorator, self).__init__(attributes, statically_defined)
141
+ def init(self):
142
+ super(BatchDecorator, self).init()
143
143
 
144
144
  # If no docker image is explicitly specified, impute a default image.
145
145
  if not self.attributes["image"]:
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
18
18
  SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH,
19
19
  )
20
20
  from metaflow.parameters import deploy_time_eval
21
+ from metaflow.user_configs.config_options import ConfigInput
21
22
  from metaflow.util import dict_to_cli_options, to_pascalcase
22
23
 
23
24
  from ..batch.batch import Batch
@@ -71,6 +72,7 @@ class StepFunctions(object):
71
72
  self.username = username
72
73
  self.max_workers = max_workers
73
74
  self.workflow_timeout = workflow_timeout
75
+ self.config_parameters = self._process_config_parameters()
74
76
 
75
77
  # https://aws.amazon.com/blogs/aws/step-functions-distributed-map-a-serverless-solution-for-large-scale-parallel-data-processing/
76
78
  self.use_distributed_map = use_distributed_map
@@ -485,6 +487,10 @@ class StepFunctions(object):
485
487
  "case-insensitive." % param.name
486
488
  )
487
489
  seen.add(norm)
490
+ # NOTE: We skip config parameters as these do not have dynamic values,
491
+ # and need to be treated differently.
492
+ if param.IS_CONFIG_PARAMETER:
493
+ continue
488
494
 
489
495
  is_required = param.kwargs.get("required", False)
490
496
  # Throw an exception if a schedule is set for a flow with required
@@ -501,6 +507,27 @@ class StepFunctions(object):
501
507
  parameters.append(dict(name=param.name, value=value))
502
508
  return parameters
503
509
 
510
+ def _process_config_parameters(self):
511
+ parameters = []
512
+ seen = set()
513
+ for var, param in self.flow._get_parameters():
514
+ if not param.IS_CONFIG_PARAMETER:
515
+ continue
516
+ # Throw an exception if the parameter is specified twice.
517
+ norm = param.name.lower()
518
+ if norm in seen:
519
+ raise MetaflowException(
520
+ "Parameter *%s* is specified twice. "
521
+ "Note that parameter names are "
522
+ "case-insensitive." % param.name
523
+ )
524
+ seen.add(norm)
525
+
526
+ parameters.append(
527
+ dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
528
+ )
529
+ return parameters
530
+
504
531
  def _batch(self, node):
505
532
  attrs = {
506
533
  # metaflow.user is only used for setting the AWS Job Name.
@@ -747,6 +774,11 @@ class StepFunctions(object):
747
774
  metaflow_version["production_token"] = self.production_token
748
775
  env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
749
776
 
777
+ # map config values
778
+ cfg_env = {param["name"]: param["kv_name"] for param in self.config_parameters}
779
+ if cfg_env:
780
+ env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
781
+
750
782
  # Set AWS DynamoDb Table Name for state tracking for for-eaches.
751
783
  # There are three instances when metaflow runtime directly interacts
752
784
  # with AWS DynamoDB.
@@ -326,6 +326,7 @@ def make_flow(
326
326
 
327
327
  # Attach AWS Batch decorator to the flow
328
328
  decorators._attach_decorators(obj.flow, [BatchDecorator.name])
329
+ decorators._init(obj.flow)
329
330
  decorators._init_step_decorators(
330
331
  obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
331
332
  )
@@ -722,8 +722,8 @@ def cli():
722
722
  pass
723
723
 
724
724
 
725
- @tracing.cli_entrypoint("s3op/list")
726
725
  @cli.command("list", help="List S3 objects")
726
+ @tracing.cli_entrypoint("s3op/list")
727
727
  @click.option(
728
728
  "--recursive/--no-recursive",
729
729
  default=False,
@@ -782,8 +782,8 @@ def lst(
782
782
  print(format_result_line(idx, url.prefix, url.url, str(size)))
783
783
 
784
784
 
785
- @tracing.cli_entrypoint("s3op/put")
786
785
  @cli.command(help="Upload files to S3")
786
+ @tracing.cli_entrypoint("s3op/put")
787
787
  @click.option(
788
788
  "--file",
789
789
  "files",
@@ -977,8 +977,8 @@ def _populate_prefixes(prefixes, inputs):
977
977
  return prefixes, is_transient_retry
978
978
 
979
979
 
980
- @tracing.cli_entrypoint("s3op/get")
981
980
  @cli.command(help="Download files from S3")
981
+ @tracing.cli_entrypoint("s3op/get")
982
982
  @click.option(
983
983
  "--recursive/--no-recursive",
984
984
  default=False,
@@ -33,12 +33,12 @@ def kubernetes():
33
33
  pass
34
34
 
35
35
 
36
- @tracing.cli_entrypoint("kubernetes/step")
37
36
  @kubernetes.command(
38
37
  help="Execute a single task on Kubernetes. This command calls the top-level step "
39
38
  "command inside a Kubernetes pod with the given options. Typically you do not call "
40
39
  "this command directly; it is used internally by Metaflow."
41
40
  )
41
+ @tracing.cli_entrypoint("kubernetes/step")
42
42
  @click.argument("step-name")
43
43
  @click.argument("code-package-sha")
44
44
  @click.argument("code-package-url")
@@ -151,8 +151,8 @@ class KubernetesDecorator(StepDecorator):
151
151
  supports_conda_environment = True
152
152
  target_platform = "linux-64"
153
153
 
154
- def __init__(self, attributes=None, statically_defined=False):
155
- super(KubernetesDecorator, self).__init__(attributes, statically_defined)
154
+ def init(self):
155
+ super(KubernetesDecorator, self).init()
156
156
 
157
157
  if not self.attributes["namespace"]:
158
158
  self.attributes["namespace"] = KUBERNETES_NAMESPACE
@@ -55,6 +55,17 @@ class CondaStepDecorator(StepDecorator):
55
55
  )
56
56
  super(CondaStepDecorator, self).__init__(attributes, statically_defined)
57
57
 
58
+ def init(self):
59
+ super(CondaStepDecorator, self).init()
60
+
61
+ # We have to go back and fixup _user_defined_attributes for potential
62
+ # config resolution
63
+ self._user_defined_attributes = {
64
+ k: v
65
+ for k, v in self.attributes.items()
66
+ if k in self._user_defined_attributes
67
+ }
68
+
58
69
  # Support legacy 'libraries=' attribute for the decorator.
59
70
  self.attributes["packages"] = {
60
71
  **self.attributes["libraries"],
@@ -338,6 +349,17 @@ class CondaFlowDecorator(FlowDecorator):
338
349
  )
339
350
  super(CondaFlowDecorator, self).__init__(attributes, statically_defined)
340
351
 
352
+ def init(self):
353
+ super(CondaFlowDecorator, self).init()
354
+
355
+ # We have to go back and fixup _user_defined_attributes for potential
356
+ # config resolution
357
+ self._user_defined_attributes = {
358
+ k: v
359
+ for k, v in self.attributes.items()
360
+ if k in self._user_defined_attributes
361
+ }
362
+
341
363
  # Support legacy 'libraries=' attribute for the decorator.
342
364
  self.attributes["packages"] = {
343
365
  **self.attributes["libraries"],
@@ -140,6 +140,7 @@ class PyPIFlowDecorator(FlowDecorator):
140
140
  from metaflow import decorators
141
141
 
142
142
  decorators._attach_decorators(flow, ["pypi"])
143
+ decorators._init(flow)
143
144
 
144
145
  # @pypi uses a conda environment to create a virtual environment.
145
146
  # The conda environment can be created through micromamba.
@@ -37,8 +37,8 @@ class TimeoutDecorator(StepDecorator):
37
37
  name = "timeout"
38
38
  defaults = {"seconds": 0, "minutes": 0, "hours": 0}
39
39
 
40
- def __init__(self, *args, **kwargs):
41
- super(TimeoutDecorator, self).__init__(*args, **kwargs)
40
+ def init(self):
41
+ super().init()
42
42
  # Initialize secs in __init__ so other decorators could safely use this
43
43
  # value without worrying about decorator order.
44
44
  # Convert values in attributes to type:int since they can be type:str
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
9
9
  )
10
10
 
11
11
  import datetime
12
+ import functools
12
13
  import importlib
13
14
  import inspect
14
15
  import itertools
@@ -38,6 +39,7 @@ from metaflow.decorators import add_decorator_options
38
39
  from metaflow.exception import MetaflowException
39
40
  from metaflow.includefile import FilePathClass
40
41
  from metaflow.parameters import JSONTypeClass, flow_context
42
+ from metaflow.user_configs.config_options import LocalFileInput
41
43
 
42
44
  # Define a recursive type alias for JSON
43
45
  JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
@@ -55,6 +57,7 @@ click_to_python_types = {
55
57
  File: str,
56
58
  JSONTypeClass: JSON,
57
59
  FilePathClass: str,
60
+ LocalFileInput: str,
58
61
  }
59
62
 
60
63
 
@@ -124,6 +127,37 @@ def _method_sanity_check(
124
127
  return method_params
125
128
 
126
129
 
130
+ def _lazy_load_command(
131
+ cli_collection: click.Group,
132
+ flow_parameters: Union[str, List[Parameter]],
133
+ _self,
134
+ name: str,
135
+ ):
136
+
137
+ # Context is not used in get_command so we can pass None. Since we pin click,
138
+ # this won't change from under us.
139
+
140
+ if isinstance(flow_parameters, str):
141
+ # Resolve flow_parameters -- for start, this is a function which we
142
+ # need to call to figure out the actual parameters (may be changed by configs)
143
+ flow_parameters = getattr(_self, flow_parameters)()
144
+ cmd_obj = cli_collection.get_command(None, name)
145
+ if cmd_obj:
146
+ if isinstance(cmd_obj, click.Group):
147
+ # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
148
+ result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
149
+ elif isinstance(cmd_obj, click.Command):
150
+ result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
151
+ else:
152
+ raise RuntimeError(
153
+ "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
154
+ )
155
+ setattr(_self, name, result)
156
+ return result
157
+ else:
158
+ raise AttributeError()
159
+
160
+
127
161
  def get_annotation(param: Union[click.Argument, click.Option]):
128
162
  py_type = click_to_python_types[type(param.type)]
129
163
  if not param.required:
@@ -179,9 +213,11 @@ def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
179
213
 
180
214
 
181
215
  class MetaflowAPI(object):
182
- def __init__(self, parent=None, **kwargs):
216
+ def __init__(self, parent=None, flow_cls=None, **kwargs):
183
217
  self._parent = parent
184
218
  self._chain = [{self._API_NAME: kwargs}]
219
+ self._flow_cls = flow_cls
220
+ self._cached_computed_parameters = None
185
221
 
186
222
  @property
187
223
  def parent(self):
@@ -200,23 +236,22 @@ class MetaflowAPI(object):
200
236
  @classmethod
201
237
  def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
202
238
  flow_cls = extract_flow_class_from_file(flow_file)
203
- flow_parameters = [p for _, p in flow_cls._get_parameters()]
239
+
204
240
  with flow_context(flow_cls) as _:
205
241
  add_decorator_options(cli_collection)
206
242
 
207
- class_dict = {"__module__": "metaflow", "_API_NAME": flow_file}
208
- command_groups = cli_collection.sources
209
- for each_group in command_groups:
210
- for _, cmd_obj in each_group.commands.items():
211
- if isinstance(cmd_obj, click.Group):
212
- # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
213
- class_dict[cmd_obj.name] = extract_group(cmd_obj, flow_parameters)
214
- elif isinstance(cmd_obj, click.Command):
215
- class_dict[cmd_obj.name] = extract_command(cmd_obj, flow_parameters)
216
- else:
217
- raise RuntimeError(
218
- "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
219
- )
243
+ def getattr_wrapper(_self, name):
244
+ # Functools.partial do not automatically bind self (no __get__)
245
+ return _self._internal_getattr(_self, name)
246
+
247
+ class_dict = {
248
+ "__module__": "metaflow",
249
+ "_API_NAME": flow_file,
250
+ "_internal_getattr": functools.partial(
251
+ _lazy_load_command, cli_collection, "_compute_flow_parameters"
252
+ ),
253
+ "__getattr__": getattr_wrapper,
254
+ }
220
255
 
221
256
  to_return = type(flow_file, (MetaflowAPI,), class_dict)
222
257
  to_return.__name__ = flow_file
@@ -237,11 +272,11 @@ class MetaflowAPI(object):
237
272
  defaults,
238
273
  **kwargs,
239
274
  )
240
- return to_return(parent=None, **method_params)
275
+ return to_return(parent=None, flow_cls=flow_cls, **method_params)
241
276
 
242
277
  m = _method
243
- m.__name__ = cmd_obj.name
244
- m.__doc__ = getattr(cmd_obj, "help", None)
278
+ m.__name__ = cli_collection.name
279
+ m.__doc__ = getattr(cli_collection, "help", None)
245
280
  m.__signature__ = inspect.signature(_method).replace(
246
281
  parameters=params_sigs.values()
247
282
  )
@@ -287,6 +322,25 @@ class MetaflowAPI(object):
287
322
 
288
323
  return components
289
324
 
325
+ def _compute_flow_parameters(self):
326
+ if self._flow_cls is None or self._parent is not None:
327
+ raise RuntimeError(
328
+ "Computing flow-level parameters for a non start API. "
329
+ "Please report to the Metaflow team."
330
+ )
331
+ # TODO: We need to actually compute the new parameters (based on configs) which
332
+ # would involve processing the options at least partially. We will do this
333
+ # before GA but for now making it work for regular parameters
334
+ if self._cached_computed_parameters is not None:
335
+ return self._cached_computed_parameters
336
+ self._cached_computed_parameters = []
337
+ for _, param in self._flow_cls._get_parameters():
338
+ if param.IS_CONFIG_PARAMETER:
339
+ continue
340
+ param.init()
341
+ self._cached_computed_parameters.append(param)
342
+ return self._cached_computed_parameters
343
+
290
344
 
291
345
  def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
292
346
  arg_params_sigs = OrderedDict()
@@ -351,7 +405,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
351
405
  method_params = _method_sanity_check(
352
406
  possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
353
407
  )
354
- return resulting_class(parent=_self, **method_params)
408
+ return resulting_class(parent=_self, flow_cls=None, **method_params)
355
409
 
356
410
  m = _method
357
411
  m.__name__ = cmd_obj.name