metaflow-stubs 2.12.22__py2.py3-none-any.whl → 2.12.23__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. metaflow-stubs/__init__.pyi +1029 -638
  2. metaflow-stubs/cards.pyi +211 -5
  3. metaflow-stubs/cli.pyi +23 -3
  4. metaflow-stubs/client/__init__.pyi +128 -3
  5. metaflow-stubs/client/core.pyi +227 -7
  6. metaflow-stubs/client/filecache.pyi +3 -3
  7. metaflow-stubs/clone_util.pyi +5 -2
  8. metaflow-stubs/events.pyi +20 -2
  9. metaflow-stubs/exception.pyi +2 -2
  10. metaflow-stubs/flowspec.pyi +71 -5
  11. metaflow-stubs/generated_for.txt +1 -1
  12. metaflow-stubs/includefile.pyi +158 -5
  13. metaflow-stubs/info_file.pyi +2 -2
  14. metaflow-stubs/metadata/metadata.pyi +9 -3
  15. metaflow-stubs/metadata/util.pyi +2 -2
  16. metaflow-stubs/metaflow_config.pyi +2 -2
  17. metaflow-stubs/metaflow_current.pyi +26 -23
  18. metaflow-stubs/mflog/mflog.pyi +2 -2
  19. metaflow-stubs/multicore_utils.pyi +2 -2
  20. metaflow-stubs/parameters.pyi +69 -3
  21. metaflow-stubs/plugins/__init__.pyi +14 -3
  22. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  23. metaflow-stubs/plugins/airflow/airflow.pyi +15 -3
  24. metaflow-stubs/plugins/airflow/airflow_cli.pyi +66 -3
  25. metaflow-stubs/plugins/airflow/airflow_decorator.pyi +5 -2
  26. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  27. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  28. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +80 -2
  29. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +5 -2
  30. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +44 -2
  31. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +44 -2
  32. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  33. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  34. metaflow-stubs/plugins/argo/argo_events.pyi +16 -2
  35. metaflow-stubs/plugins/argo/argo_workflows.pyi +6 -4
  36. metaflow-stubs/plugins/argo/argo_workflows_cli.pyi +97 -6
  37. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +34 -6
  38. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +49 -4
  39. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  40. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  41. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  42. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  43. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  44. metaflow-stubs/plugins/aws/batch/batch_cli.pyi +2 -2
  45. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  46. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +103 -3
  47. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  48. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +15 -3
  49. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  50. metaflow-stubs/plugins/aws/step_functions/dynamo_db_client.pyi +2 -2
  51. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  52. metaflow-stubs/plugins/aws/step_functions/production_token.pyi +2 -2
  53. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +21 -2
  54. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  55. metaflow-stubs/plugins/aws/step_functions/step_functions_cli.pyi +69 -3
  56. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  57. metaflow-stubs/plugins/aws/step_functions/step_functions_decorator.pyi +5 -2
  58. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +49 -4
  59. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  60. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  61. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  62. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +21 -3
  63. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  64. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  65. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  66. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  67. metaflow-stubs/plugins/cards/card_cli.pyi +62 -4
  68. metaflow-stubs/plugins/cards/card_client.pyi +33 -2
  69. metaflow-stubs/plugins/cards/card_creator.pyi +5 -2
  70. metaflow-stubs/plugins/cards/card_datastore.pyi +8 -2
  71. metaflow-stubs/plugins/cards/card_decorator.pyi +52 -2
  72. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +28 -2
  73. metaflow-stubs/plugins/cards/card_modules/basic.pyi +41 -2
  74. metaflow-stubs/plugins/cards/card_modules/card.pyi +28 -2
  75. metaflow-stubs/plugins/cards/card_modules/chevron/__init__.pyi +2 -2
  76. metaflow-stubs/plugins/cards/card_modules/chevron/main.pyi +2 -2
  77. metaflow-stubs/plugins/cards/card_modules/chevron/metadata.pyi +2 -2
  78. metaflow-stubs/plugins/cards/card_modules/chevron/renderer.pyi +2 -2
  79. metaflow-stubs/plugins/cards/card_modules/chevron/tokenizer.pyi +2 -2
  80. metaflow-stubs/plugins/cards/card_modules/components.pyi +184 -4
  81. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +5 -2
  82. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  83. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +35 -2
  84. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  85. metaflow-stubs/plugins/cards/component_serializer.pyi +57 -4
  86. metaflow-stubs/plugins/cards/exception.pyi +8 -2
  87. metaflow-stubs/plugins/catch_decorator.pyi +20 -3
  88. metaflow-stubs/plugins/datatools/__init__.pyi +62 -2
  89. metaflow-stubs/plugins/datatools/local.pyi +16 -2
  90. metaflow-stubs/plugins/datatools/s3/__init__.pyi +72 -3
  91. metaflow-stubs/plugins/datatools/s3/s3.pyi +82 -5
  92. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  93. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  94. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  95. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  96. metaflow-stubs/plugins/environment_decorator.pyi +10 -2
  97. metaflow-stubs/plugins/events_decorator.pyi +106 -2
  98. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  99. metaflow-stubs/plugins/frameworks/pytorch.pyi +24 -3
  100. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  101. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +15 -3
  102. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  103. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  104. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  105. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  106. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  107. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  108. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +3 -3
  109. metaflow-stubs/plugins/kubernetes/kubernetes_cli.pyi +65 -2
  110. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  111. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +100 -3
  112. metaflow-stubs/plugins/kubernetes/kubernetes_job.pyi +2 -2
  113. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +5 -2
  114. metaflow-stubs/plugins/logs_cli.pyi +2 -2
  115. metaflow-stubs/plugins/package_cli.pyi +2 -2
  116. metaflow-stubs/plugins/parallel_decorator.pyi +29 -2
  117. metaflow-stubs/plugins/project_decorator.pyi +59 -2
  118. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  119. metaflow-stubs/plugins/pypi/conda_decorator.pyi +45 -2
  120. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  121. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +33 -2
  122. metaflow-stubs/plugins/pypi/pypi_environment.pyi +3 -3
  123. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  124. metaflow-stubs/plugins/resources_decorator.pyi +33 -2
  125. metaflow-stubs/plugins/retry_decorator.pyi +21 -2
  126. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  128. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +11 -2
  129. metaflow-stubs/plugins/storage_executor.pyi +6 -2
  130. metaflow-stubs/plugins/tag_cli.pyi +35 -4
  131. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +10 -3
  132. metaflow-stubs/plugins/timeout_decorator.pyi +23 -2
  133. metaflow-stubs/procpoll.pyi +2 -2
  134. metaflow-stubs/pylint_wrapper.pyi +2 -2
  135. metaflow-stubs/runner/__init__.pyi +2 -2
  136. metaflow-stubs/runner/deployer.pyi +70 -131
  137. metaflow-stubs/runner/metaflow_runner.pyi +117 -9
  138. metaflow-stubs/runner/nbdeploy.pyi +66 -2
  139. metaflow-stubs/runner/nbrun.pyi +79 -2
  140. metaflow-stubs/runner/subprocess_manager.pyi +16 -4
  141. metaflow-stubs/runner/utils.pyi +32 -2
  142. metaflow-stubs/system/__init__.pyi +4 -4
  143. metaflow-stubs/system/system_logger.pyi +3 -3
  144. metaflow-stubs/system/system_monitor.pyi +3 -3
  145. metaflow-stubs/tagging_util.pyi +2 -2
  146. metaflow-stubs/tuple_util.pyi +2 -2
  147. metaflow-stubs/version.pyi +2 -2
  148. {metaflow_stubs-2.12.22.dist-info → metaflow_stubs-2.12.23.dist-info}/METADATA +2 -2
  149. metaflow_stubs-2.12.23.dist-info/RECORD +152 -0
  150. metaflow_stubs-2.12.22.dist-info/RECORD +0 -152
  151. {metaflow_stubs-2.12.22.dist-info → metaflow_stubs-2.12.23.dist-info}/WHEEL +0 -0
  152. {metaflow_stubs-2.12.22.dist-info → metaflow_stubs-2.12.23.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,25 @@
1
1
  ##################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.12.22 #
4
- # Generated on 2024-09-20T00:45:49.586219 #
3
+ # MF version: 2.12.23 #
4
+ # Generated on 2024-10-01T14:32:39.945734 #
5
5
  ##################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import metaflow.flowspec
12
- import metaflow.runner.metaflow_runner
13
- import metaflow.events
14
- import metaflow.client.core
15
11
  import metaflow.plugins.datatools.s3.s3
16
- import metaflow.metaflow_current
12
+ import metaflow.events
13
+ import metaflow._vendor.click.types
14
+ import io
17
15
  import datetime
16
+ import metaflow.parameters
17
+ import metaflow.runner.metaflow_runner
18
18
  import metaflow.datastore.inputs
19
+ import metaflow.metaflow_current
20
+ import metaflow.client.core
21
+ import metaflow.flowspec
19
22
  import typing
20
- import io
21
- import metaflow._vendor.click.types
22
- import metaflow.parameters
23
23
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
24
24
  StepFlag = typing.NewType("StepFlag", bool)
25
25
 
@@ -103,6 +103,14 @@ def metadata(ms: str) -> str:
103
103
  ...
104
104
 
105
105
  class FlowSpec(object, metaclass=metaflow.flowspec._FlowSpecMeta):
106
+ """
107
+ Main class from which all Flows should inherit.
108
+
109
+ Attributes
110
+ ----------
111
+ index
112
+ input
113
+ """
106
114
  def __init__(self, use_cli = True):
107
115
  """
108
116
  Construct a FlowSpec
@@ -324,6 +332,49 @@ class FlowSpec(object, metaclass=metaflow.flowspec._FlowSpecMeta):
324
332
  ...
325
333
 
326
334
  class Parameter(object, metaclass=type):
335
+ """
336
+ Defines a parameter for a flow.
337
+
338
+ Parameters must be instantiated as class variables in flow classes, e.g.
339
+ ```
340
+ class MyFlow(FlowSpec):
341
+ param = Parameter('myparam')
342
+ ```
343
+ in this case, the parameter is specified on the command line as
344
+ ```
345
+ python myflow.py run --myparam=5
346
+ ```
347
+ and its value is accessible through a read-only artifact like this:
348
+ ```
349
+ print(self.param == 5)
350
+ ```
351
+ Note that the user-visible parameter name, `myparam` above, can be
352
+ different from the artifact name, `param` above.
353
+
354
+ The parameter value is converted to a Python type based on the `type`
355
+ argument or to match the type of `default`, if it is set.
356
+
357
+ Parameters
358
+ ----------
359
+ name : str
360
+ User-visible parameter name.
361
+ default : str or float or int or bool or `JSONType` or a function.
362
+ Default value for the parameter. Use a special `JSONType` class to
363
+ indicate that the value must be a valid JSON object. A function
364
+ implies that the parameter corresponds to a *deploy-time parameter*.
365
+ The type of the default value is used as the parameter `type`.
366
+ type : Type, default None
367
+ If `default` is not specified, define the parameter type. Specify
368
+ one of `str`, `float`, `int`, `bool`, or `JSONType`. If None, defaults
369
+ to the type of `default` or `str` if none specified.
370
+ help : str, optional
371
+ Help text to show in `run --help`.
372
+ required : bool, default False
373
+ Require that the user specified a value for the parameter.
374
+ `required=True` implies that the `default` is not used.
375
+ show_default : bool, default True
376
+ If True, show the default value in the help text.
377
+ """
327
378
  def __init__(self, name: str, default: typing.Union[str, float, int, bool, typing.Dict[str, typing.Any], typing.Callable[[], typing.Union[str, float, int, bool, typing.Dict[str, typing.Any]]], None] = None, type: typing.Union[typing.Type[str], typing.Type[float], typing.Type[int], typing.Type[bool], metaflow.parameters.JSONTypeClass, None] = None, help: typing.Optional[str] = None, required: bool = False, show_default: bool = True, **kwargs: typing.Dict[str, typing.Any]):
328
379
  ...
329
380
  def __repr__(self):
@@ -353,6 +404,59 @@ class JSONTypeClass(metaflow._vendor.click.types.ParamType, metaclass=type):
353
404
  JSONType: metaflow.parameters.JSONTypeClass
354
405
 
355
406
  class S3(object, metaclass=type):
407
+ """
408
+ The Metaflow S3 client.
409
+
410
+ This object manages the connection to S3 and a temporary diretory that is used
411
+ to download objects. Note that in most cases when the data fits in memory, no local
412
+ disk IO is needed as operations are cached by the operating system, which makes
413
+ operations fast as long as there is enough memory available.
414
+
415
+ The easiest way is to use this object as a context manager:
416
+ ```
417
+ with S3() as s3:
418
+ data = [obj.blob for obj in s3.get_many(urls)]
419
+ print(data)
420
+ ```
421
+ The context manager takes care of creating and deleting a temporary directory
422
+ automatically. Without a context manager, you must call `.close()` to delete
423
+ the directory explicitly:
424
+ ```
425
+ s3 = S3()
426
+ data = [obj.blob for obj in s3.get_many(urls)]
427
+ s3.close()
428
+ ```
429
+ You can customize the location of the temporary directory with `tmproot`. It
430
+ defaults to the current working directory.
431
+
432
+ To make it easier to deal with object locations, the client can be initialized
433
+ with an S3 path prefix. There are three ways to handle locations:
434
+
435
+ 1. Use a `metaflow.Run` object or `self`, e.g. `S3(run=self)` which
436
+ initializes the prefix with the global `DATATOOLS_S3ROOT` path, combined
437
+ with the current run ID. This mode makes it easy to version data based
438
+ on the run ID consistently. You can use the `bucket` and `prefix` to
439
+ override parts of `DATATOOLS_S3ROOT`.
440
+
441
+ 2. Specify an S3 prefix explicitly with `s3root`,
442
+ e.g. `S3(s3root='s3://mybucket/some/path')`.
443
+
444
+ 3. Specify nothing, i.e. `S3()`, in which case all operations require
445
+ a full S3 url prefixed with `s3://`.
446
+
447
+ Parameters
448
+ ----------
449
+ tmproot : str, default: '.'
450
+ Where to store the temporary directory.
451
+ bucket : str, optional
452
+ Override the bucket from `DATATOOLS_S3ROOT` when `run` is specified.
453
+ prefix : str, optional
454
+ Override the path from `DATATOOLS_S3ROOT` when `run` is specified.
455
+ run : FlowSpec or Run, optional
456
+ Derive path prefix from the current or a past run ID, e.g. S3(run=self).
457
+ s3root : str, optional
458
+ If `run` is not specified, use this as the S3 prefix.
459
+ """
356
460
  @classmethod
357
461
  def get_root_from_config(cls, echo, create_on_absent = True):
358
462
  ...
@@ -644,6 +748,33 @@ class S3(object, metaclass=type):
644
748
  ...
645
749
 
646
750
  class IncludeFile(metaflow.parameters.Parameter, metaclass=type):
751
+ """
752
+ Includes a local file as a parameter for the flow.
753
+
754
+ `IncludeFile` behaves like `Parameter` except that it reads its value from a file instead of
755
+ the command line. The user provides a path to a file on the command line. The file contents
756
+ are saved as a read-only artifact which is available in all steps of the flow.
757
+
758
+ Parameters
759
+ ----------
760
+ name : str
761
+ User-visible parameter name.
762
+ default : Union[str, Callable[ParameterContext, str]]
763
+ Default path to a local file. A function
764
+ implies that the parameter corresponds to a *deploy-time parameter*.
765
+ is_text : bool, default True
766
+ Convert the file contents to a string using the provided `encoding`.
767
+ If False, the artifact is stored in `bytes`.
768
+ encoding : str, optional, default 'utf-8'
769
+ Use this encoding to decode the file contexts if `is_text=True`.
770
+ required : bool, default False
771
+ Require that the user specified a value for the parameter.
772
+ `required=True` implies that the `default` is not used.
773
+ help : str, optional
774
+ Help text to show in `run --help`.
775
+ show_default : bool, default True
776
+ If True, show the default value in the help text.
777
+ """
647
778
  def __init__(self, name: str, required: bool = False, is_text: bool = True, encoding: str = "utf-8", help: typing.Optional[str] = None, **kwargs: typing.Dict[str, str]):
648
779
  ...
649
780
  def load_parameter(self, v):
@@ -724,263 +855,429 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
724
855
  ...
725
856
 
726
857
  @typing.overload
727
- def resources(*, cpu: int = 1, gpu: int = 0, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
858
+ def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
728
859
  """
729
- Specifies the resources needed when executing this step.
730
-
731
- Use `@resources` to specify the resource requirements
732
- independently of the specific compute layer (`@batch`, `@kubernetes`).
733
-
734
- You can choose the compute layer on the command line by executing e.g.
735
- ```
736
- python myflow.py run --with batch
737
- ```
738
- or
739
- ```
740
- python myflow.py run --with kubernetes
741
- ```
742
- which executes the flow on the desired system using the
743
- requirements specified in `@resources`.
860
+ Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
744
861
 
745
862
  Parameters
746
863
  ----------
747
864
  cpu : int, default 1
748
- Number of CPUs required for this step.
865
+ Number of CPUs required for this step. If `@resources` is
866
+ also present, the maximum value from all decorators is used.
749
867
  gpu : int, default 0
750
- Number of GPUs required for this step.
751
- disk : int, optional, default None
752
- Disk size (in MB) required for this step. Only applies on Kubernetes.
868
+ Number of GPUs required for this step. If `@resources` is
869
+ also present, the maximum value from all decorators is used.
753
870
  memory : int, default 4096
754
- Memory size (in MB) required for this step.
871
+ Memory size (in MB) required for this step. If
872
+ `@resources` is also present, the maximum value from all decorators is
873
+ used.
874
+ image : str, optional, default None
875
+ Docker image to use when launching on AWS Batch. If not specified, and
876
+ METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
877
+ not, a default Docker image mapping to the current version of Python is used.
878
+ queue : str, default METAFLOW_BATCH_JOB_QUEUE
879
+ AWS Batch Job Queue to submit the job to.
880
+ iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
881
+ AWS IAM role that AWS Batch container uses to access AWS cloud resources.
882
+ execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
883
+ AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
884
+ (https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
755
885
  shared_memory : int, optional, default None
756
886
  The value for the size (in MiB) of the /dev/shm volume for this step.
757
887
  This parameter maps to the `--shm-size` option in Docker.
888
+ max_swap : int, optional, default None
889
+ The total amount of swap memory (in MiB) a container can use for this
890
+ step. This parameter is translated to the `--memory-swap` option in
891
+ Docker where the value is the sum of the container memory plus the
892
+ `max_swap` value.
893
+ swappiness : int, optional, default None
894
+ This allows you to tune memory swappiness behavior for this step.
895
+ A swappiness value of 0 causes swapping not to happen unless absolutely
896
+ necessary. A swappiness value of 100 causes pages to be swapped very
897
+ aggressively. Accepted values are whole numbers between 0 and 100.
898
+ use_tmpfs : bool, default False
899
+ This enables an explicit tmpfs mount for this step. Note that tmpfs is
900
+ not available on Fargate compute environments
901
+ tmpfs_tempdir : bool, default True
902
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
903
+ tmpfs_size : int, optional, default None
904
+ The value for the size (in MiB) of the tmpfs mount for this step.
905
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
906
+ memory allocated for this step.
907
+ tmpfs_path : str, optional, default None
908
+ Path to tmpfs mount for this step. Defaults to /metaflow_temp.
909
+ inferentia : int, default 0
910
+ Number of Inferentia chips required for this step.
911
+ trainium : int, default None
912
+ Alias for inferentia. Use only one of the two.
913
+ efa : int, default 0
914
+ Number of elastic fabric adapter network devices to attach to container
915
+ ephemeral_storage : int, default None
916
+ The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
917
+ This is only relevant for Fargate compute environments
918
+ log_driver: str, optional, default None
919
+ The log driver to use for the Amazon ECS container.
920
+ log_options: List[str], optional, default None
921
+ List of strings containing options for the chosen log driver. The configurable values
922
+ depend on the `log driver` chosen. Validation of these options is not supported yet.
923
+ Example: [`awslogs-group:aws/batch/job`]
758
924
  """
759
925
  ...
760
926
 
761
927
  @typing.overload
762
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
928
+ def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
763
929
  ...
764
930
 
765
931
  @typing.overload
766
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
932
+ def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
767
933
  ...
768
934
 
769
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
935
+ def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
770
936
  """
771
- Specifies the resources needed when executing this step.
772
-
773
- Use `@resources` to specify the resource requirements
774
- independently of the specific compute layer (`@batch`, `@kubernetes`).
775
-
776
- You can choose the compute layer on the command line by executing e.g.
777
- ```
778
- python myflow.py run --with batch
779
- ```
780
- or
781
- ```
782
- python myflow.py run --with kubernetes
783
- ```
784
- which executes the flow on the desired system using the
785
- requirements specified in `@resources`.
937
+ Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
786
938
 
787
939
  Parameters
788
940
  ----------
789
941
  cpu : int, default 1
790
- Number of CPUs required for this step.
942
+ Number of CPUs required for this step. If `@resources` is
943
+ also present, the maximum value from all decorators is used.
791
944
  gpu : int, default 0
792
- Number of GPUs required for this step.
793
- disk : int, optional, default None
794
- Disk size (in MB) required for this step. Only applies on Kubernetes.
945
+ Number of GPUs required for this step. If `@resources` is
946
+ also present, the maximum value from all decorators is used.
795
947
  memory : int, default 4096
796
- Memory size (in MB) required for this step.
948
+ Memory size (in MB) required for this step. If
949
+ `@resources` is also present, the maximum value from all decorators is
950
+ used.
951
+ image : str, optional, default None
952
+ Docker image to use when launching on AWS Batch. If not specified, and
953
+ METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
954
+ not, a default Docker image mapping to the current version of Python is used.
955
+ queue : str, default METAFLOW_BATCH_JOB_QUEUE
956
+ AWS Batch Job Queue to submit the job to.
957
+ iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
958
+ AWS IAM role that AWS Batch container uses to access AWS cloud resources.
959
+ execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
960
+ AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
961
+ (https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
797
962
  shared_memory : int, optional, default None
798
963
  The value for the size (in MiB) of the /dev/shm volume for this step.
799
964
  This parameter maps to the `--shm-size` option in Docker.
965
+ max_swap : int, optional, default None
966
+ The total amount of swap memory (in MiB) a container can use for this
967
+ step. This parameter is translated to the `--memory-swap` option in
968
+ Docker where the value is the sum of the container memory plus the
969
+ `max_swap` value.
970
+ swappiness : int, optional, default None
971
+ This allows you to tune memory swappiness behavior for this step.
972
+ A swappiness value of 0 causes swapping not to happen unless absolutely
973
+ necessary. A swappiness value of 100 causes pages to be swapped very
974
+ aggressively. Accepted values are whole numbers between 0 and 100.
975
+ use_tmpfs : bool, default False
976
+ This enables an explicit tmpfs mount for this step. Note that tmpfs is
977
+ not available on Fargate compute environments
978
+ tmpfs_tempdir : bool, default True
979
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
980
+ tmpfs_size : int, optional, default None
981
+ The value for the size (in MiB) of the tmpfs mount for this step.
982
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
983
+ memory allocated for this step.
984
+ tmpfs_path : str, optional, default None
985
+ Path to tmpfs mount for this step. Defaults to /metaflow_temp.
986
+ inferentia : int, default 0
987
+ Number of Inferentia chips required for this step.
988
+ trainium : int, default None
989
+ Alias for inferentia. Use only one of the two.
990
+ efa : int, default 0
991
+ Number of elastic fabric adapter network devices to attach to container
992
+ ephemeral_storage : int, default None
993
+ The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
994
+ This is only relevant for Fargate compute environments
995
+ log_driver: str, optional, default None
996
+ The log driver to use for the Amazon ECS container.
997
+ log_options: List[str], optional, default None
998
+ List of strings containing options for the chosen log driver. The configurable values
999
+ depend on the `log driver` chosen. Validation of these options is not supported yet.
1000
+ Example: [`awslogs-group:aws/batch/job`]
800
1001
  """
801
1002
  ...
802
1003
 
803
1004
  @typing.overload
804
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1005
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
805
1006
  """
806
- Specifies that the step will success under all circumstances.
1007
+ Specifies a timeout for your step.
807
1008
 
808
- The decorator will create an optional artifact, specified by `var`, which
809
- contains the exception raised. You can use it to detect the presence
810
- of errors, indicating that all happy-path artifacts produced by the step
811
- are missing.
1009
+ This decorator is useful if this step may hang indefinitely.
1010
+
1011
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1012
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1013
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1014
+
1015
+ Note that all the values specified in parameters are added together so if you specify
1016
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
812
1017
 
813
1018
  Parameters
814
1019
  ----------
815
- var : str, optional, default None
816
- Name of the artifact in which to store the caught exception.
817
- If not specified, the exception is not stored.
818
- print_exception : bool, default True
819
- Determines whether or not the exception is printed to
820
- stdout when caught.
1020
+ seconds : int, default 0
1021
+ Number of seconds to wait prior to timing out.
1022
+ minutes : int, default 0
1023
+ Number of minutes to wait prior to timing out.
1024
+ hours : int, default 0
1025
+ Number of hours to wait prior to timing out.
821
1026
  """
822
1027
  ...
823
1028
 
824
1029
  @typing.overload
825
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1030
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
826
1031
  ...
827
1032
 
828
1033
  @typing.overload
829
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1034
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
830
1035
  ...
831
1036
 
832
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1037
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
833
1038
  """
834
- Specifies that the step will success under all circumstances.
1039
+ Specifies a timeout for your step.
835
1040
 
836
- The decorator will create an optional artifact, specified by `var`, which
837
- contains the exception raised. You can use it to detect the presence
838
- of errors, indicating that all happy-path artifacts produced by the step
839
- are missing.
1041
+ This decorator is useful if this step may hang indefinitely.
840
1042
 
841
- Parameters
1043
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1044
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1045
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1046
+
1047
+ Note that all the values specified in parameters are added together so if you specify
1048
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1049
+
1050
+ Parameters
842
1051
  ----------
843
- var : str, optional, default None
844
- Name of the artifact in which to store the caught exception.
845
- If not specified, the exception is not stored.
846
- print_exception : bool, default True
847
- Determines whether or not the exception is printed to
848
- stdout when caught.
1052
+ seconds : int, default 0
1053
+ Number of seconds to wait prior to timing out.
1054
+ minutes : int, default 0
1055
+ Number of minutes to wait prior to timing out.
1056
+ hours : int, default 0
1057
+ Number of hours to wait prior to timing out.
849
1058
  """
850
1059
  ...
851
1060
 
852
1061
  @typing.overload
853
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1062
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
854
1063
  """
855
- Specifies the number of times the task corresponding
856
- to a step needs to be retried.
857
-
858
- This decorator is useful for handling transient errors, such as networking issues.
859
- If your task contains operations that can't be retried safely, e.g. database updates,
860
- it is advisable to annotate it with `@retry(times=0)`.
1064
+ Specifies the PyPI packages for the step.
861
1065
 
862
- This can be used in conjunction with the `@catch` decorator. The `@catch`
863
- decorator will execute a no-op task after all retries have been exhausted,
864
- ensuring that the flow execution can continue.
1066
+ Information in this decorator will augment any
1067
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1068
+ you can use `@pypi_base` to set packages required by all
1069
+ steps and use `@pypi` to specify step-specific overrides.
865
1070
 
866
1071
  Parameters
867
1072
  ----------
868
- times : int, default 3
869
- Number of times to retry this task.
870
- minutes_between_retries : int, default 2
871
- Number of minutes between retries.
1073
+ packages : Dict[str, str], default: {}
1074
+ Packages to use for this step. The key is the name of the package
1075
+ and the value is the version to use.
1076
+ python : str, optional, default: None
1077
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1078
+ that the version used will correspond to the version of the Python interpreter used to start the run.
872
1079
  """
873
1080
  ...
874
1081
 
875
1082
  @typing.overload
876
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1083
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
877
1084
  ...
878
1085
 
879
1086
  @typing.overload
880
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1087
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
881
1088
  ...
882
1089
 
883
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1090
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
884
1091
  """
885
- Specifies the number of times the task corresponding
886
- to a step needs to be retried.
887
-
888
- This decorator is useful for handling transient errors, such as networking issues.
889
- If your task contains operations that can't be retried safely, e.g. database updates,
890
- it is advisable to annotate it with `@retry(times=0)`.
1092
+ Specifies the PyPI packages for the step.
891
1093
 
892
- This can be used in conjunction with the `@catch` decorator. The `@catch`
893
- decorator will execute a no-op task after all retries have been exhausted,
894
- ensuring that the flow execution can continue.
1094
+ Information in this decorator will augment any
1095
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1096
+ you can use `@pypi_base` to set packages required by all
1097
+ steps and use `@pypi` to specify step-specific overrides.
895
1098
 
896
1099
  Parameters
897
1100
  ----------
898
- times : int, default 3
899
- Number of times to retry this task.
900
- minutes_between_retries : int, default 2
901
- Number of minutes between retries.
1101
+ packages : Dict[str, str], default: {}
1102
+ Packages to use for this step. The key is the name of the package
1103
+ and the value is the version to use.
1104
+ python : str, optional, default: None
1105
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1106
+ that the version used will correspond to the version of the Python interpreter used to start the run.
902
1107
  """
903
1108
  ...
904
1109
 
905
1110
  @typing.overload
906
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1111
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
907
1112
  """
908
- Specifies secrets to be retrieved and injected as environment variables prior to
909
- the execution of a step.
1113
+ Specifies environment variables to be set prior to the execution of a step.
910
1114
 
911
1115
  Parameters
912
1116
  ----------
913
- sources : List[Union[str, Dict[str, Any]]], default: []
914
- List of secret specs, defining how the secrets are to be retrieved
1117
+ vars : Dict[str, str], default {}
1118
+ Dictionary of environment variables to set.
915
1119
  """
916
1120
  ...
917
1121
 
918
1122
  @typing.overload
919
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1123
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
920
1124
  ...
921
1125
 
922
1126
  @typing.overload
923
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1127
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
924
1128
  ...
925
1129
 
926
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
1130
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
927
1131
  """
928
- Specifies secrets to be retrieved and injected as environment variables prior to
929
- the execution of a step.
1132
+ Specifies environment variables to be set prior to the execution of a step.
930
1133
 
931
1134
  Parameters
932
1135
  ----------
933
- sources : List[Union[str, Dict[str, Any]]], default: []
934
- List of secret specs, defining how the secrets are to be retrieved
1136
+ vars : Dict[str, str], default {}
1137
+ Dictionary of environment variables to set.
935
1138
  """
936
1139
  ...
937
1140
 
938
1141
  @typing.overload
939
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1142
+ def resources(*, cpu: int = 1, gpu: int = 0, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
940
1143
  """
941
- Specifies the PyPI packages for the step.
1144
+ Specifies the resources needed when executing this step.
942
1145
 
943
- Information in this decorator will augment any
944
- attributes set in the `@pyi_base` flow-level decorator. Hence,
945
- you can use `@pypi_base` to set packages required by all
946
- steps and use `@pypi` to specify step-specific overrides.
1146
+ Use `@resources` to specify the resource requirements
1147
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1148
+
1149
+ You can choose the compute layer on the command line by executing e.g.
1150
+ ```
1151
+ python myflow.py run --with batch
1152
+ ```
1153
+ or
1154
+ ```
1155
+ python myflow.py run --with kubernetes
1156
+ ```
1157
+ which executes the flow on the desired system using the
1158
+ requirements specified in `@resources`.
947
1159
 
948
1160
  Parameters
949
1161
  ----------
950
- packages : Dict[str, str], default: {}
951
- Packages to use for this step. The key is the name of the package
952
- and the value is the version to use.
953
- python : str, optional, default: None
954
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
955
- that the version used will correspond to the version of the Python interpreter used to start the run.
1162
+ cpu : int, default 1
1163
+ Number of CPUs required for this step.
1164
+ gpu : int, default 0
1165
+ Number of GPUs required for this step.
1166
+ disk : int, optional, default None
1167
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1168
+ memory : int, default 4096
1169
+ Memory size (in MB) required for this step.
1170
+ shared_memory : int, optional, default None
1171
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1172
+ This parameter maps to the `--shm-size` option in Docker.
956
1173
  """
957
1174
  ...
958
1175
 
959
1176
  @typing.overload
960
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1177
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
961
1178
  ...
962
1179
 
963
1180
  @typing.overload
964
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1181
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
965
1182
  ...
966
1183
 
967
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1184
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
968
1185
  """
969
- Specifies the PyPI packages for the step.
1186
+ Specifies the resources needed when executing this step.
970
1187
 
971
- Information in this decorator will augment any
972
- attributes set in the `@pyi_base` flow-level decorator. Hence,
973
- you can use `@pypi_base` to set packages required by all
974
- steps and use `@pypi` to specify step-specific overrides.
1188
+ Use `@resources` to specify the resource requirements
1189
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1190
+
1191
+ You can choose the compute layer on the command line by executing e.g.
1192
+ ```
1193
+ python myflow.py run --with batch
1194
+ ```
1195
+ or
1196
+ ```
1197
+ python myflow.py run --with kubernetes
1198
+ ```
1199
+ which executes the flow on the desired system using the
1200
+ requirements specified in `@resources`.
975
1201
 
976
1202
  Parameters
977
1203
  ----------
978
- packages : Dict[str, str], default: {}
979
- Packages to use for this step. The key is the name of the package
980
- and the value is the version to use.
981
- python : str, optional, default: None
982
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
983
- that the version used will correspond to the version of the Python interpreter used to start the run.
1204
+ cpu : int, default 1
1205
+ Number of CPUs required for this step.
1206
+ gpu : int, default 0
1207
+ Number of GPUs required for this step.
1208
+ disk : int, optional, default None
1209
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1210
+ memory : int, default 4096
1211
+ Memory size (in MB) required for this step.
1212
+ shared_memory : int, optional, default None
1213
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1214
+ This parameter maps to the `--shm-size` option in Docker.
1215
+ """
1216
+ ...
1217
+
1218
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = "KUBERNETES_IMAGE_PULL_POLICY", service_account: str = "METAFLOW_KUBERNETES_SERVICE_ACCOUNT", secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = "METAFLOW_KUBERNETES_NAMESPACE", gpu: typing.Optional[int] = None, gpu_vendor: str = "KUBERNETES_GPU_VENDOR", tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = "/metaflow_temp", persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1219
+ """
1220
+ Specifies that this step should execute on Kubernetes.
1221
+
1222
+ Parameters
1223
+ ----------
1224
+ cpu : int, default 1
1225
+ Number of CPUs required for this step. If `@resources` is
1226
+ also present, the maximum value from all decorators is used.
1227
+ memory : int, default 4096
1228
+ Memory size (in MB) required for this step. If
1229
+ `@resources` is also present, the maximum value from all decorators is
1230
+ used.
1231
+ disk : int, default 10240
1232
+ Disk size (in MB) required for this step. If
1233
+ `@resources` is also present, the maximum value from all decorators is
1234
+ used.
1235
+ image : str, optional, default None
1236
+ Docker image to use when launching on Kubernetes. If not specified, and
1237
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1238
+ not, a default Docker image mapping to the current version of Python is used.
1239
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1240
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
1241
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1242
+ Kubernetes service account to use when launching pod in Kubernetes.
1243
+ secrets : List[str], optional, default None
1244
+ Kubernetes secrets to use when launching pod in Kubernetes. These
1245
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1246
+ in Metaflow configuration.
1247
+ node_selector: Union[Dict[str,str], str], optional, default None
1248
+ Kubernetes node selector(s) to apply to the pod running the task.
1249
+ Can be passed in as a comma separated string of values e.g. "kubernetes.io/os=linux,kubernetes.io/arch=amd64"
1250
+ or as a dictionary {"kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64"}
1251
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1252
+ Kubernetes namespace to use when launching pod in Kubernetes.
1253
+ gpu : int, optional, default None
1254
+ Number of GPUs required for this step. A value of zero implies that
1255
+ the scheduled node should not have GPUs.
1256
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1257
+ The vendor of the GPUs to be used for this step.
1258
+ tolerations : List[str], default []
1259
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1260
+ Kubernetes tolerations to use when launching pod in Kubernetes.
1261
+ use_tmpfs : bool, default False
1262
+ This enables an explicit tmpfs mount for this step.
1263
+ tmpfs_tempdir : bool, default True
1264
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1265
+ tmpfs_size : int, optional, default: None
1266
+ The value for the size (in MiB) of the tmpfs mount for this step.
1267
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1268
+ memory allocated for this step.
1269
+ tmpfs_path : str, optional, default /metaflow_temp
1270
+ Path to tmpfs mount for this step.
1271
+ persistent_volume_claims : Dict[str, str], optional, default None
1272
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1273
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1274
+ shared_memory: int, optional
1275
+ Shared memory size (in MiB) required for this step
1276
+ port: int, optional
1277
+ Port number to specify in the Kubernetes job object
1278
+ compute_pool : str, optional, default None
1279
+ Compute pool to be used for for this step.
1280
+ If not specified, any accessible compute pool within the perimeter is used.
984
1281
  """
985
1282
  ...
986
1283
 
@@ -1042,237 +1339,123 @@ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
1042
1339
  ...
1043
1340
 
1044
1341
  @typing.overload
1045
- def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1342
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1046
1343
  """
1047
- Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
1048
-
1049
- Parameters
1050
- ----------
1051
- cpu : int, default 1
1052
- Number of CPUs required for this step. If `@resources` is
1053
- also present, the maximum value from all decorators is used.
1054
- gpu : int, default 0
1055
- Number of GPUs required for this step. If `@resources` is
1056
- also present, the maximum value from all decorators is used.
1057
- memory : int, default 4096
1058
- Memory size (in MB) required for this step. If
1059
- `@resources` is also present, the maximum value from all decorators is
1060
- used.
1061
- image : str, optional, default None
1062
- Docker image to use when launching on AWS Batch. If not specified, and
1063
- METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
1064
- not, a default Docker image mapping to the current version of Python is used.
1065
- queue : str, default METAFLOW_BATCH_JOB_QUEUE
1066
- AWS Batch Job Queue to submit the job to.
1067
- iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
1068
- AWS IAM role that AWS Batch container uses to access AWS cloud resources.
1069
- execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
1070
- AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
1071
- (https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
1072
- shared_memory : int, optional, default None
1073
- The value for the size (in MiB) of the /dev/shm volume for this step.
1074
- This parameter maps to the `--shm-size` option in Docker.
1075
- max_swap : int, optional, default None
1076
- The total amount of swap memory (in MiB) a container can use for this
1077
- step. This parameter is translated to the `--memory-swap` option in
1078
- Docker where the value is the sum of the container memory plus the
1079
- `max_swap` value.
1080
- swappiness : int, optional, default None
1081
- This allows you to tune memory swappiness behavior for this step.
1082
- A swappiness value of 0 causes swapping not to happen unless absolutely
1083
- necessary. A swappiness value of 100 causes pages to be swapped very
1084
- aggressively. Accepted values are whole numbers between 0 and 100.
1085
- use_tmpfs : bool, default False
1086
- This enables an explicit tmpfs mount for this step. Note that tmpfs is
1087
- not available on Fargate compute environments
1088
- tmpfs_tempdir : bool, default True
1089
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1090
- tmpfs_size : int, optional, default None
1091
- The value for the size (in MiB) of the tmpfs mount for this step.
1092
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1093
- memory allocated for this step.
1094
- tmpfs_path : str, optional, default None
1095
- Path to tmpfs mount for this step. Defaults to /metaflow_temp.
1096
- inferentia : int, default 0
1097
- Number of Inferentia chips required for this step.
1098
- trainium : int, default None
1099
- Alias for inferentia. Use only one of the two.
1100
- efa : int, default 0
1101
- Number of elastic fabric adapter network devices to attach to container
1102
- ephemeral_storage : int, default None
1103
- The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
1104
- This is only relevant for Fargate compute environments
1105
- log_driver: str, optional, default None
1106
- The log driver to use for the Amazon ECS container.
1107
- log_options: List[str], optional, default None
1108
- List of strings containing options for the chosen log driver. The configurable values
1109
- depend on the `log driver` chosen. Validation of these options is not supported yet.
1110
- Example: [`awslogs-group:aws/batch/job`]
1344
+ Decorator prototype for all step decorators. This function gets specialized
1345
+ and imported for all decorators types by _import_plugin_decorators().
1111
1346
  """
1112
1347
  ...
1113
1348
 
1114
1349
  @typing.overload
1115
- def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1116
- ...
1117
-
1118
- @typing.overload
1119
- def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1350
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1120
1351
  ...
1121
1352
 
1122
- def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
1353
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1123
1354
  """
1124
- Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
1125
-
1126
- Parameters
1127
- ----------
1128
- cpu : int, default 1
1129
- Number of CPUs required for this step. If `@resources` is
1130
- also present, the maximum value from all decorators is used.
1131
- gpu : int, default 0
1132
- Number of GPUs required for this step. If `@resources` is
1133
- also present, the maximum value from all decorators is used.
1134
- memory : int, default 4096
1135
- Memory size (in MB) required for this step. If
1136
- `@resources` is also present, the maximum value from all decorators is
1137
- used.
1138
- image : str, optional, default None
1139
- Docker image to use when launching on AWS Batch. If not specified, and
1140
- METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
1141
- not, a default Docker image mapping to the current version of Python is used.
1142
- queue : str, default METAFLOW_BATCH_JOB_QUEUE
1143
- AWS Batch Job Queue to submit the job to.
1144
- iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
1145
- AWS IAM role that AWS Batch container uses to access AWS cloud resources.
1146
- execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
1147
- AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
1148
- (https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
1149
- shared_memory : int, optional, default None
1150
- The value for the size (in MiB) of the /dev/shm volume for this step.
1151
- This parameter maps to the `--shm-size` option in Docker.
1152
- max_swap : int, optional, default None
1153
- The total amount of swap memory (in MiB) a container can use for this
1154
- step. This parameter is translated to the `--memory-swap` option in
1155
- Docker where the value is the sum of the container memory plus the
1156
- `max_swap` value.
1157
- swappiness : int, optional, default None
1158
- This allows you to tune memory swappiness behavior for this step.
1159
- A swappiness value of 0 causes swapping not to happen unless absolutely
1160
- necessary. A swappiness value of 100 causes pages to be swapped very
1161
- aggressively. Accepted values are whole numbers between 0 and 100.
1162
- use_tmpfs : bool, default False
1163
- This enables an explicit tmpfs mount for this step. Note that tmpfs is
1164
- not available on Fargate compute environments
1165
- tmpfs_tempdir : bool, default True
1166
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1167
- tmpfs_size : int, optional, default None
1168
- The value for the size (in MiB) of the tmpfs mount for this step.
1169
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1170
- memory allocated for this step.
1171
- tmpfs_path : str, optional, default None
1172
- Path to tmpfs mount for this step. Defaults to /metaflow_temp.
1173
- inferentia : int, default 0
1174
- Number of Inferentia chips required for this step.
1175
- trainium : int, default None
1176
- Alias for inferentia. Use only one of the two.
1177
- efa : int, default 0
1178
- Number of elastic fabric adapter network devices to attach to container
1179
- ephemeral_storage : int, default None
1180
- The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
1181
- This is only relevant for Fargate compute environments
1182
- log_driver: str, optional, default None
1183
- The log driver to use for the Amazon ECS container.
1184
- log_options: List[str], optional, default None
1185
- List of strings containing options for the chosen log driver. The configurable values
1186
- depend on the `log driver` chosen. Validation of these options is not supported yet.
1187
- Example: [`awslogs-group:aws/batch/job`]
1355
+ Decorator prototype for all step decorators. This function gets specialized
1356
+ and imported for all decorators types by _import_plugin_decorators().
1188
1357
  """
1189
1358
  ...
1190
1359
 
1191
1360
  @typing.overload
1192
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1361
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1193
1362
  """
1194
- Specifies environment variables to be set prior to the execution of a step.
1363
+ Specifies the number of times the task corresponding
1364
+ to a step needs to be retried.
1365
+
1366
+ This decorator is useful for handling transient errors, such as networking issues.
1367
+ If your task contains operations that can't be retried safely, e.g. database updates,
1368
+ it is advisable to annotate it with `@retry(times=0)`.
1369
+
1370
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1371
+ decorator will execute a no-op task after all retries have been exhausted,
1372
+ ensuring that the flow execution can continue.
1195
1373
 
1196
1374
  Parameters
1197
1375
  ----------
1198
- vars : Dict[str, str], default {}
1199
- Dictionary of environment variables to set.
1376
+ times : int, default 3
1377
+ Number of times to retry this task.
1378
+ minutes_between_retries : int, default 2
1379
+ Number of minutes between retries.
1200
1380
  """
1201
1381
  ...
1202
1382
 
1203
1383
  @typing.overload
1204
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1384
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1205
1385
  ...
1206
1386
 
1207
1387
  @typing.overload
1208
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1388
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1209
1389
  ...
1210
1390
 
1211
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1391
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1212
1392
  """
1213
- Specifies environment variables to be set prior to the execution of a step.
1393
+ Specifies the number of times the task corresponding
1394
+ to a step needs to be retried.
1395
+
1396
+ This decorator is useful for handling transient errors, such as networking issues.
1397
+ If your task contains operations that can't be retried safely, e.g. database updates,
1398
+ it is advisable to annotate it with `@retry(times=0)`.
1399
+
1400
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1401
+ decorator will execute a no-op task after all retries have been exhausted,
1402
+ ensuring that the flow execution can continue.
1214
1403
 
1215
1404
  Parameters
1216
1405
  ----------
1217
- vars : Dict[str, str], default {}
1218
- Dictionary of environment variables to set.
1406
+ times : int, default 3
1407
+ Number of times to retry this task.
1408
+ minutes_between_retries : int, default 2
1409
+ Number of minutes between retries.
1219
1410
  """
1220
1411
  ...
1221
1412
 
1222
1413
  @typing.overload
1223
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1414
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1224
1415
  """
1225
- Specifies a timeout for your step.
1226
-
1227
- This decorator is useful if this step may hang indefinitely.
1228
-
1229
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1230
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1231
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1416
+ Specifies that the step will success under all circumstances.
1232
1417
 
1233
- Note that all the values specified in parameters are added together so if you specify
1234
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1418
+ The decorator will create an optional artifact, specified by `var`, which
1419
+ contains the exception raised. You can use it to detect the presence
1420
+ of errors, indicating that all happy-path artifacts produced by the step
1421
+ are missing.
1235
1422
 
1236
1423
  Parameters
1237
1424
  ----------
1238
- seconds : int, default 0
1239
- Number of seconds to wait prior to timing out.
1240
- minutes : int, default 0
1241
- Number of minutes to wait prior to timing out.
1242
- hours : int, default 0
1243
- Number of hours to wait prior to timing out.
1425
+ var : str, optional, default None
1426
+ Name of the artifact in which to store the caught exception.
1427
+ If not specified, the exception is not stored.
1428
+ print_exception : bool, default True
1429
+ Determines whether or not the exception is printed to
1430
+ stdout when caught.
1244
1431
  """
1245
1432
  ...
1246
1433
 
1247
1434
  @typing.overload
1248
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1435
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1249
1436
  ...
1250
1437
 
1251
1438
  @typing.overload
1252
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1439
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1253
1440
  ...
1254
1441
 
1255
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1442
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1256
1443
  """
1257
- Specifies a timeout for your step.
1258
-
1259
- This decorator is useful if this step may hang indefinitely.
1260
-
1261
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1262
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1263
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1444
+ Specifies that the step will success under all circumstances.
1264
1445
 
1265
- Note that all the values specified in parameters are added together so if you specify
1266
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1446
+ The decorator will create an optional artifact, specified by `var`, which
1447
+ contains the exception raised. You can use it to detect the presence
1448
+ of errors, indicating that all happy-path artifacts produced by the step
1449
+ are missing.
1267
1450
 
1268
1451
  Parameters
1269
1452
  ----------
1270
- seconds : int, default 0
1271
- Number of seconds to wait prior to timing out.
1272
- minutes : int, default 0
1273
- Number of minutes to wait prior to timing out.
1274
- hours : int, default 0
1275
- Number of hours to wait prior to timing out.
1453
+ var : str, optional, default None
1454
+ Name of the artifact in which to store the caught exception.
1455
+ If not specified, the exception is not stored.
1456
+ print_exception : bool, default True
1457
+ Determines whether or not the exception is printed to
1458
+ stdout when caught.
1276
1459
  """
1277
1460
  ...
1278
1461
 
@@ -1327,134 +1510,117 @@ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
1327
1510
  """
1328
1511
  ...
1329
1512
 
1330
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = "KUBERNETES_IMAGE_PULL_POLICY", service_account: str = "METAFLOW_KUBERNETES_SERVICE_ACCOUNT", secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = "METAFLOW_KUBERNETES_NAMESPACE", gpu: typing.Optional[int] = None, gpu_vendor: str = "KUBERNETES_GPU_VENDOR", tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = "/metaflow_temp", persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1513
+ @typing.overload
1514
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1331
1515
  """
1332
- Specifies that this step should execute on Kubernetes.
1516
+ Specifies secrets to be retrieved and injected as environment variables prior to
1517
+ the execution of a step.
1333
1518
 
1334
1519
  Parameters
1335
1520
  ----------
1336
- cpu : int, default 1
1337
- Number of CPUs required for this step. If `@resources` is
1338
- also present, the maximum value from all decorators is used.
1339
- memory : int, default 4096
1340
- Memory size (in MB) required for this step. If
1341
- `@resources` is also present, the maximum value from all decorators is
1342
- used.
1343
- disk : int, default 10240
1344
- Disk size (in MB) required for this step. If
1345
- `@resources` is also present, the maximum value from all decorators is
1346
- used.
1347
- image : str, optional, default None
1348
- Docker image to use when launching on Kubernetes. If not specified, and
1349
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1350
- not, a default Docker image mapping to the current version of Python is used.
1351
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1352
- If given, the imagePullPolicy to be applied to the Docker image of the step.
1353
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1354
- Kubernetes service account to use when launching pod in Kubernetes.
1355
- secrets : List[str], optional, default None
1356
- Kubernetes secrets to use when launching pod in Kubernetes. These
1357
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1358
- in Metaflow configuration.
1359
- node_selector: Union[Dict[str,str], str], optional, default None
1360
- Kubernetes node selector(s) to apply to the pod running the task.
1361
- Can be passed in as a comma separated string of values e.g. "kubernetes.io/os=linux,kubernetes.io/arch=amd64"
1362
- or as a dictionary {"kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64"}
1363
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1364
- Kubernetes namespace to use when launching pod in Kubernetes.
1365
- gpu : int, optional, default None
1366
- Number of GPUs required for this step. A value of zero implies that
1367
- the scheduled node should not have GPUs.
1368
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1369
- The vendor of the GPUs to be used for this step.
1370
- tolerations : List[str], default []
1371
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1372
- Kubernetes tolerations to use when launching pod in Kubernetes.
1373
- use_tmpfs : bool, default False
1374
- This enables an explicit tmpfs mount for this step.
1375
- tmpfs_tempdir : bool, default True
1376
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1377
- tmpfs_size : int, optional, default: None
1378
- The value for the size (in MiB) of the tmpfs mount for this step.
1379
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1380
- memory allocated for this step.
1381
- tmpfs_path : str, optional, default /metaflow_temp
1382
- Path to tmpfs mount for this step.
1383
- persistent_volume_claims : Dict[str, str], optional, default None
1384
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1385
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1386
- shared_memory: int, optional
1387
- Shared memory size (in MiB) required for this step
1388
- port: int, optional
1389
- Port number to specify in the Kubernetes job object
1390
- compute_pool : str, optional, default None
1391
- Compute pool to be used for for this step.
1392
- If not specified, any accessible compute pool within the perimeter is used.
1521
+ sources : List[Union[str, Dict[str, Any]]], default: []
1522
+ List of secret specs, defining how the secrets are to be retrieved
1393
1523
  """
1394
1524
  ...
1395
1525
 
1396
1526
  @typing.overload
1397
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1527
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1528
+ ...
1529
+
1530
+ @typing.overload
1531
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1532
+ ...
1533
+
1534
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
1398
1535
  """
1399
- Decorator prototype for all step decorators. This function gets specialized
1400
- and imported for all decorators types by _import_plugin_decorators().
1536
+ Specifies secrets to be retrieved and injected as environment variables prior to
1537
+ the execution of a step.
1538
+
1539
+ Parameters
1540
+ ----------
1541
+ sources : List[Union[str, Dict[str, Any]]], default: []
1542
+ List of secret specs, defining how the secrets are to be retrieved
1401
1543
  """
1402
1544
  ...
1403
1545
 
1404
1546
  @typing.overload
1405
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1547
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1548
+ """
1549
+ Specifies the PyPI packages for all steps of the flow.
1550
+
1551
+ Use `@pypi_base` to set common packages required by all
1552
+ steps and use `@pypi` to specify step-specific overrides.
1553
+ Parameters
1554
+ ----------
1555
+ packages : Dict[str, str], default: {}
1556
+ Packages to use for this flow. The key is the name of the package
1557
+ and the value is the version to use.
1558
+ python : str, optional, default: None
1559
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1560
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1561
+ """
1406
1562
  ...
1407
1563
 
1408
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1564
+ @typing.overload
1565
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1566
+ ...
1567
+
1568
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1409
1569
  """
1410
- Decorator prototype for all step decorators. This function gets specialized
1411
- and imported for all decorators types by _import_plugin_decorators().
1570
+ Specifies the PyPI packages for all steps of the flow.
1571
+
1572
+ Use `@pypi_base` to set common packages required by all
1573
+ steps and use `@pypi` to specify step-specific overrides.
1574
+ Parameters
1575
+ ----------
1576
+ packages : Dict[str, str], default: {}
1577
+ Packages to use for this flow. The key is the name of the package
1578
+ and the value is the version to use.
1579
+ python : str, optional, default: None
1580
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1581
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1412
1582
  """
1413
1583
  ...
1414
1584
 
1415
1585
  @typing.overload
1416
- def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1586
+ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1417
1587
  """
1418
- Specifies the flow(s) that this flow depends on.
1588
+ Specifies the event(s) that this flow depends on.
1419
1589
 
1420
1590
  ```
1421
- @trigger_on_finish(flow='FooFlow')
1591
+ @trigger(event='foo')
1422
1592
  ```
1423
1593
  or
1424
1594
  ```
1425
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1595
+ @trigger(events=['foo', 'bar'])
1426
1596
  ```
1427
- This decorator respects the @project decorator and triggers the flow
1428
- when upstream runs within the same namespace complete successfully
1429
1597
 
1430
- Additionally, you can specify project aware upstream flow dependencies
1431
- by specifying the fully qualified project_flow_name.
1598
+ Additionally, you can specify the parameter mappings
1599
+ to map event payload to Metaflow parameters for the flow.
1432
1600
  ```
1433
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1601
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1434
1602
  ```
1435
1603
  or
1436
1604
  ```
1437
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1605
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1606
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1438
1607
  ```
1439
1608
 
1440
- You can also specify just the project or project branch (other values will be
1441
- inferred from the current project or project branch):
1609
+ 'parameters' can also be a list of strings and tuples like so:
1442
1610
  ```
1443
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1611
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1612
+ ```
1613
+ This is equivalent to:
1614
+ ```
1615
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1444
1616
  ```
1445
-
1446
- Note that `branch` is typically one of:
1447
- - `prod`
1448
- - `user.bob`
1449
- - `test.my_experiment`
1450
- - `prod.staging`
1451
1617
 
1452
1618
  Parameters
1453
1619
  ----------
1454
- flow : Union[str, Dict[str, str]], optional, default None
1455
- Upstream flow dependency for this flow.
1456
- flows : List[Union[str, Dict[str, str]]], default []
1457
- Upstream flow dependencies for this flow.
1620
+ event : Union[str, Dict[str, Any]], optional, default None
1621
+ Event dependency for this flow.
1622
+ events : List[Union[str, Dict[str, Any]]], default []
1623
+ Events dependency for this flow.
1458
1624
  options : Dict[str, Any], default {}
1459
1625
  Backend-specific configuration for tuning eventing behavior.
1460
1626
 
@@ -1463,51 +1629,47 @@ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] =
1463
1629
  ...
1464
1630
 
1465
1631
  @typing.overload
1466
- def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1632
+ def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1467
1633
  ...
1468
1634
 
1469
- def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1635
+ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1470
1636
  """
1471
- Specifies the flow(s) that this flow depends on.
1637
+ Specifies the event(s) that this flow depends on.
1472
1638
 
1473
1639
  ```
1474
- @trigger_on_finish(flow='FooFlow')
1640
+ @trigger(event='foo')
1475
1641
  ```
1476
1642
  or
1477
1643
  ```
1478
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1644
+ @trigger(events=['foo', 'bar'])
1479
1645
  ```
1480
- This decorator respects the @project decorator and triggers the flow
1481
- when upstream runs within the same namespace complete successfully
1482
1646
 
1483
- Additionally, you can specify project aware upstream flow dependencies
1484
- by specifying the fully qualified project_flow_name.
1647
+ Additionally, you can specify the parameter mappings
1648
+ to map event payload to Metaflow parameters for the flow.
1485
1649
  ```
1486
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1650
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1487
1651
  ```
1488
1652
  or
1489
1653
  ```
1490
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1654
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1655
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1491
1656
  ```
1492
1657
 
1493
- You can also specify just the project or project branch (other values will be
1494
- inferred from the current project or project branch):
1658
+ 'parameters' can also be a list of strings and tuples like so:
1495
1659
  ```
1496
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1660
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1661
+ ```
1662
+ This is equivalent to:
1663
+ ```
1664
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1497
1665
  ```
1498
-
1499
- Note that `branch` is typically one of:
1500
- - `prod`
1501
- - `user.bob`
1502
- - `test.my_experiment`
1503
- - `prod.staging`
1504
1666
 
1505
1667
  Parameters
1506
1668
  ----------
1507
- flow : Union[str, Dict[str, str]], optional, default None
1508
- Upstream flow dependency for this flow.
1509
- flows : List[Union[str, Dict[str, str]]], default []
1510
- Upstream flow dependencies for this flow.
1669
+ event : Union[str, Dict[str, Any]], optional, default None
1670
+ Event dependency for this flow.
1671
+ events : List[Union[str, Dict[str, Any]]], default []
1672
+ Events dependency for this flow.
1511
1673
  options : Dict[str, Any], default {}
1512
1674
  Backend-specific configuration for tuning eventing behavior.
1513
1675
 
@@ -1515,49 +1677,13 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1515
1677
  """
1516
1678
  ...
1517
1679
 
1518
- @typing.overload
1519
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1520
- """
1521
- Specifies the PyPI packages for all steps of the flow.
1522
-
1523
- Use `@pypi_base` to set common packages required by all
1524
- steps and use `@pypi` to specify step-specific overrides.
1525
- Parameters
1526
- ----------
1527
- packages : Dict[str, str], default: {}
1528
- Packages to use for this flow. The key is the name of the package
1529
- and the value is the version to use.
1530
- python : str, optional, default: None
1531
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1532
- that the version used will correspond to the version of the Python interpreter used to start the run.
1533
- """
1534
- ...
1535
-
1536
- @typing.overload
1537
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1538
- ...
1539
-
1540
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1541
- """
1542
- Specifies the PyPI packages for all steps of the flow.
1543
-
1544
- Use `@pypi_base` to set common packages required by all
1545
- steps and use `@pypi` to specify step-specific overrides.
1546
- Parameters
1547
- ----------
1548
- packages : Dict[str, str], default: {}
1549
- Packages to use for this flow. The key is the name of the package
1550
- and the value is the version to use.
1551
- python : str, optional, default: None
1552
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1553
- that the version used will correspond to the version of the Python interpreter used to start the run.
1554
- """
1555
- ...
1556
-
1557
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1680
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1558
1681
  """
1559
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1560
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1682
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1683
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1684
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1685
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1686
+ starts only after all sensors finish.
1561
1687
 
1562
1688
  Parameters
1563
1689
  ----------
@@ -1578,80 +1704,92 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1578
1704
  Name of the sensor on Airflow
1579
1705
  description : str
1580
1706
  Description of sensor in the Airflow UI
1581
- external_dag_id : str
1582
- The dag_id that contains the task you want to wait for.
1583
- external_task_ids : List[str]
1584
- The list of task_ids that you want to wait for.
1585
- If None (default value) the sensor waits for the DAG. (Default: None)
1586
- allowed_states : List[str]
1587
- Iterable of allowed states, (Default: ['success'])
1588
- failed_states : List[str]
1589
- Iterable of failed or dis-allowed states. (Default: None)
1590
- execution_delta : datetime.timedelta
1591
- time difference with the previous execution to look at,
1592
- the default is the same logical date as the current task or DAG. (Default: None)
1593
- check_existence: bool
1594
- Set to True to check if the external task exists or check if
1595
- the DAG to wait for exists. (Default: True)
1707
+ bucket_key : Union[str, List[str]]
1708
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1709
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1710
+ bucket_name : str
1711
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1712
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1713
+ wildcard_match : bool
1714
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1715
+ aws_conn_id : str
1716
+ a reference to the s3 connection on Airflow. (Default: None)
1717
+ verify : bool
1718
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1596
1719
  """
1597
1720
  ...
1598
1721
 
1599
- @typing.overload
1600
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1722
+ def project(*, name: str) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1601
1723
  """
1602
- Specifies the times when the flow should be run when running on a
1603
- production scheduler.
1724
+ Specifies what flows belong to the same project.
1725
+
1726
+ A project-specific namespace is created for all flows that
1727
+ use the same `@project(name)`.
1604
1728
 
1605
1729
  Parameters
1606
1730
  ----------
1607
- hourly : bool, default False
1608
- Run the workflow hourly.
1609
- daily : bool, default True
1610
- Run the workflow daily.
1611
- weekly : bool, default False
1612
- Run the workflow weekly.
1613
- cron : str, optional, default None
1614
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1615
- specified by this expression.
1616
- timezone : str, optional, default None
1617
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1618
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1731
+ name : str
1732
+ Project name. Make sure that the name is unique amongst all
1733
+ projects that use the same production scheduler. The name may
1734
+ contain only lowercase alphanumeric characters and underscores.
1735
+
1736
+
1619
1737
  """
1620
1738
  ...
1621
1739
 
1622
1740
  @typing.overload
1623
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1624
- ...
1625
-
1626
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1741
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1627
1742
  """
1628
- Specifies the times when the flow should be run when running on a
1629
- production scheduler.
1743
+ Specifies the Conda environment for all steps of the flow.
1744
+
1745
+ Use `@conda_base` to set common libraries required by all
1746
+ steps and use `@conda` to specify step-specific additions.
1630
1747
 
1631
1748
  Parameters
1632
1749
  ----------
1633
- hourly : bool, default False
1634
- Run the workflow hourly.
1635
- daily : bool, default True
1636
- Run the workflow daily.
1637
- weekly : bool, default False
1638
- Run the workflow weekly.
1639
- cron : str, optional, default None
1640
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1641
- specified by this expression.
1642
- timezone : str, optional, default None
1643
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1644
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1750
+ packages : Dict[str, str], default {}
1751
+ Packages to use for this flow. The key is the name of the package
1752
+ and the value is the version to use.
1753
+ libraries : Dict[str, str], default {}
1754
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1755
+ python : str, optional, default None
1756
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1757
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1758
+ disabled : bool, default False
1759
+ If set to True, disables Conda.
1645
1760
  """
1646
1761
  ...
1647
1762
 
1648
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1763
+ @typing.overload
1764
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1765
+ ...
1766
+
1767
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1649
1768
  """
1650
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1651
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1652
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1653
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1654
- starts only after all sensors finish.
1769
+ Specifies the Conda environment for all steps of the flow.
1770
+
1771
+ Use `@conda_base` to set common libraries required by all
1772
+ steps and use `@conda` to specify step-specific additions.
1773
+
1774
+ Parameters
1775
+ ----------
1776
+ packages : Dict[str, str], default {}
1777
+ Packages to use for this flow. The key is the name of the package
1778
+ and the value is the version to use.
1779
+ libraries : Dict[str, str], default {}
1780
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1781
+ python : str, optional, default None
1782
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1783
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1784
+ disabled : bool, default False
1785
+ If set to True, disables Conda.
1786
+ """
1787
+ ...
1788
+
1789
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1790
+ """
1791
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1792
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1655
1793
 
1656
1794
  Parameters
1657
1795
  ----------
@@ -1672,60 +1810,67 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1672
1810
  Name of the sensor on Airflow
1673
1811
  description : str
1674
1812
  Description of sensor in the Airflow UI
1675
- bucket_key : Union[str, List[str]]
1676
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1677
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1678
- bucket_name : str
1679
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1680
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1681
- wildcard_match : bool
1682
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1683
- aws_conn_id : str
1684
- a reference to the s3 connection on Airflow. (Default: None)
1685
- verify : bool
1686
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1813
+ external_dag_id : str
1814
+ The dag_id that contains the task you want to wait for.
1815
+ external_task_ids : List[str]
1816
+ The list of task_ids that you want to wait for.
1817
+ If None (default value) the sensor waits for the DAG. (Default: None)
1818
+ allowed_states : List[str]
1819
+ Iterable of allowed states, (Default: ['success'])
1820
+ failed_states : List[str]
1821
+ Iterable of failed or dis-allowed states. (Default: None)
1822
+ execution_delta : datetime.timedelta
1823
+ time difference with the previous execution to look at,
1824
+ the default is the same logical date as the current task or DAG. (Default: None)
1825
+ check_existence: bool
1826
+ Set to True to check if the external task exists or check if
1827
+ the DAG to wait for exists. (Default: True)
1687
1828
  """
1688
1829
  ...
1689
1830
 
1690
1831
  @typing.overload
1691
- def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1832
+ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1692
1833
  """
1693
- Specifies the event(s) that this flow depends on.
1834
+ Specifies the flow(s) that this flow depends on.
1694
1835
 
1695
1836
  ```
1696
- @trigger(event='foo')
1837
+ @trigger_on_finish(flow='FooFlow')
1697
1838
  ```
1698
1839
  or
1699
1840
  ```
1700
- @trigger(events=['foo', 'bar'])
1841
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1701
1842
  ```
1843
+ This decorator respects the @project decorator and triggers the flow
1844
+ when upstream runs within the same namespace complete successfully
1702
1845
 
1703
- Additionally, you can specify the parameter mappings
1704
- to map event payload to Metaflow parameters for the flow.
1846
+ Additionally, you can specify project aware upstream flow dependencies
1847
+ by specifying the fully qualified project_flow_name.
1705
1848
  ```
1706
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1849
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1707
1850
  ```
1708
1851
  or
1709
1852
  ```
1710
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1711
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1853
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1712
1854
  ```
1713
1855
 
1714
- 'parameters' can also be a list of strings and tuples like so:
1715
- ```
1716
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1717
- ```
1718
- This is equivalent to:
1856
+ You can also specify just the project or project branch (other values will be
1857
+ inferred from the current project or project branch):
1719
1858
  ```
1720
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1859
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1721
1860
  ```
1722
1861
 
1862
+ Note that `branch` is typically one of:
1863
+ - `prod`
1864
+ - `user.bob`
1865
+ - `test.my_experiment`
1866
+ - `prod.staging`
1867
+
1723
1868
  Parameters
1724
1869
  ----------
1725
- event : Union[str, Dict[str, Any]], optional, default None
1726
- Event dependency for this flow.
1727
- events : List[Union[str, Dict[str, Any]]], default []
1728
- Events dependency for this flow.
1870
+ flow : Union[str, Dict[str, str]], optional, default None
1871
+ Upstream flow dependency for this flow.
1872
+ flows : List[Union[str, Dict[str, str]]], default []
1873
+ Upstream flow dependencies for this flow.
1729
1874
  options : Dict[str, Any], default {}
1730
1875
  Backend-specific configuration for tuning eventing behavior.
1731
1876
 
@@ -1734,47 +1879,51 @@ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = No
1734
1879
  ...
1735
1880
 
1736
1881
  @typing.overload
1737
- def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1882
+ def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1738
1883
  ...
1739
1884
 
1740
- def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1885
+ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1741
1886
  """
1742
- Specifies the event(s) that this flow depends on.
1887
+ Specifies the flow(s) that this flow depends on.
1743
1888
 
1744
1889
  ```
1745
- @trigger(event='foo')
1890
+ @trigger_on_finish(flow='FooFlow')
1746
1891
  ```
1747
1892
  or
1748
1893
  ```
1749
- @trigger(events=['foo', 'bar'])
1894
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1750
1895
  ```
1896
+ This decorator respects the @project decorator and triggers the flow
1897
+ when upstream runs within the same namespace complete successfully
1751
1898
 
1752
- Additionally, you can specify the parameter mappings
1753
- to map event payload to Metaflow parameters for the flow.
1899
+ Additionally, you can specify project aware upstream flow dependencies
1900
+ by specifying the fully qualified project_flow_name.
1754
1901
  ```
1755
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1902
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1756
1903
  ```
1757
1904
  or
1758
1905
  ```
1759
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1760
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1906
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1761
1907
  ```
1762
1908
 
1763
- 'parameters' can also be a list of strings and tuples like so:
1764
- ```
1765
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1766
- ```
1767
- This is equivalent to:
1909
+ You can also specify just the project or project branch (other values will be
1910
+ inferred from the current project or project branch):
1768
1911
  ```
1769
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1912
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1770
1913
  ```
1771
1914
 
1915
+ Note that `branch` is typically one of:
1916
+ - `prod`
1917
+ - `user.bob`
1918
+ - `test.my_experiment`
1919
+ - `prod.staging`
1920
+
1772
1921
  Parameters
1773
1922
  ----------
1774
- event : Union[str, Dict[str, Any]], optional, default None
1775
- Event dependency for this flow.
1776
- events : List[Union[str, Dict[str, Any]]], default []
1777
- Events dependency for this flow.
1923
+ flow : Union[str, Dict[str, str]], optional, default None
1924
+ Upstream flow dependency for this flow.
1925
+ flows : List[Union[str, Dict[str, str]]], default []
1926
+ Upstream flow dependencies for this flow.
1778
1927
  options : Dict[str, Any], default {}
1779
1928
  Backend-specific configuration for tuning eventing behavior.
1780
1929
 
@@ -1782,70 +1931,52 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1782
1931
  """
1783
1932
  ...
1784
1933
 
1785
- def project(*, name: str) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1786
- """
1787
- Specifies what flows belong to the same project.
1788
-
1789
- A project-specific namespace is created for all flows that
1790
- use the same `@project(name)`.
1791
-
1792
- Parameters
1793
- ----------
1794
- name : str
1795
- Project name. Make sure that the name is unique amongst all
1796
- projects that use the same production scheduler. The name may
1797
- contain only lowercase alphanumeric characters and underscores.
1798
-
1799
-
1800
- """
1801
- ...
1802
-
1803
1934
  @typing.overload
1804
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1935
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1805
1936
  """
1806
- Specifies the Conda environment for all steps of the flow.
1807
-
1808
- Use `@conda_base` to set common libraries required by all
1809
- steps and use `@conda` to specify step-specific additions.
1937
+ Specifies the times when the flow should be run when running on a
1938
+ production scheduler.
1810
1939
 
1811
1940
  Parameters
1812
1941
  ----------
1813
- packages : Dict[str, str], default {}
1814
- Packages to use for this flow. The key is the name of the package
1815
- and the value is the version to use.
1816
- libraries : Dict[str, str], default {}
1817
- Supported for backward compatibility. When used with packages, packages will take precedence.
1818
- python : str, optional, default None
1819
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1820
- that the version used will correspond to the version of the Python interpreter used to start the run.
1821
- disabled : bool, default False
1822
- If set to True, disables Conda.
1942
+ hourly : bool, default False
1943
+ Run the workflow hourly.
1944
+ daily : bool, default True
1945
+ Run the workflow daily.
1946
+ weekly : bool, default False
1947
+ Run the workflow weekly.
1948
+ cron : str, optional, default None
1949
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1950
+ specified by this expression.
1951
+ timezone : str, optional, default None
1952
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1953
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1823
1954
  """
1824
1955
  ...
1825
1956
 
1826
1957
  @typing.overload
1827
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1958
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1828
1959
  ...
1829
1960
 
1830
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1961
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1831
1962
  """
1832
- Specifies the Conda environment for all steps of the flow.
1833
-
1834
- Use `@conda_base` to set common libraries required by all
1835
- steps and use `@conda` to specify step-specific additions.
1963
+ Specifies the times when the flow should be run when running on a
1964
+ production scheduler.
1836
1965
 
1837
1966
  Parameters
1838
1967
  ----------
1839
- packages : Dict[str, str], default {}
1840
- Packages to use for this flow. The key is the name of the package
1841
- and the value is the version to use.
1842
- libraries : Dict[str, str], default {}
1843
- Supported for backward compatibility. When used with packages, packages will take precedence.
1844
- python : str, optional, default None
1845
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1846
- that the version used will correspond to the version of the Python interpreter used to start the run.
1847
- disabled : bool, default False
1848
- If set to True, disables Conda.
1968
+ hourly : bool, default False
1969
+ Run the workflow hourly.
1970
+ daily : bool, default True
1971
+ Run the workflow daily.
1972
+ weekly : bool, default False
1973
+ Run the workflow weekly.
1974
+ cron : str, optional, default None
1975
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1976
+ specified by this expression.
1977
+ timezone : str, optional, default None
1978
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1979
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1849
1980
  """
1850
1981
  ...
1851
1982
 
@@ -1927,6 +2058,19 @@ def default_metadata() -> str:
1927
2058
  ...
1928
2059
 
1929
2060
  class Metaflow(object, metaclass=type):
2061
+ """
2062
+ Entry point to all objects in the Metaflow universe.
2063
+
2064
+ This object can be used to list all the flows present either through the explicit property
2065
+ or by iterating over this object.
2066
+
2067
+ Attributes
2068
+ ----------
2069
+ flows : List[Flow]
2070
+ Returns the list of all `Flow` objects known to this metadata provider. Note that only
2071
+ flows present in the current namespace will be returned. A `Flow` is present in a namespace
2072
+ if it has at least one run in the namespace.
2073
+ """
1930
2074
  def __init__(self):
1931
2075
  ...
1932
2076
  @property
@@ -1978,6 +2122,17 @@ class Metaflow(object, metaclass=type):
1978
2122
  ...
1979
2123
 
1980
2124
  class Flow(metaflow.client.core.MetaflowObject, metaclass=type):
2125
+ """
2126
+ A Flow represents all existing flows with a certain name, in other words,
2127
+ classes derived from `FlowSpec`. A container of `Run` objects.
2128
+
2129
+ Attributes
2130
+ ----------
2131
+ latest_run : Run
2132
+ Latest `Run` (in progress or completed, successfully or not) of this flow.
2133
+ latest_successful_run : Run
2134
+ Latest successfully completed `Run` of this flow.
2135
+ """
1981
2136
  def __init__(self, *args, **kwargs):
1982
2137
  ...
1983
2138
  @property
@@ -2064,6 +2219,26 @@ class Flow(metaflow.client.core.MetaflowObject, metaclass=type):
2064
2219
  ...
2065
2220
 
2066
2221
  class Run(metaflow.client.core.MetaflowObject, metaclass=type):
2222
+ """
2223
+ A `Run` represents an execution of a `Flow`. It is a container of `Step`s.
2224
+
2225
+ Attributes
2226
+ ----------
2227
+ data : MetaflowData
2228
+ a shortcut to run['end'].task.data, i.e. data produced by this run.
2229
+ successful : bool
2230
+ True if the run completed successfully.
2231
+ finished : bool
2232
+ True if the run completed.
2233
+ finished_at : datetime
2234
+ Time this run finished.
2235
+ code : MetaflowCode
2236
+ Code package for this run (if present). See `MetaflowCode`.
2237
+ trigger : MetaflowTrigger
2238
+ Information about event(s) that triggered this run (if present). See `MetaflowTrigger`.
2239
+ end_task : Task
2240
+ `Task` for the end step (if it is present already).
2241
+ """
2067
2242
  def steps(self, *tags: str) -> typing.Iterator[metaflow.client.core.Step]:
2068
2243
  """
2069
2244
  [Legacy function - do not use]
@@ -2296,6 +2471,23 @@ class Run(metaflow.client.core.MetaflowObject, metaclass=type):
2296
2471
  ...
2297
2472
 
2298
2473
  class Step(metaflow.client.core.MetaflowObject, metaclass=type):
2474
+ """
2475
+ A `Step` represents a user-defined step, that is, a method annotated with the `@step` decorator.
2476
+
2477
+ It contains `Task` objects associated with the step, that is, all executions of the
2478
+ `Step`. The step may contain multiple `Task`s in the case of a foreach step.
2479
+
2480
+ Attributes
2481
+ ----------
2482
+ task : Task
2483
+ The first `Task` object in this step. This is a shortcut for retrieving the only
2484
+ task contained in a non-foreach step.
2485
+ finished_at : datetime
2486
+ Time when the latest `Task` of this step finished. Note that in the case of foreaches,
2487
+ this time may change during execution of the step.
2488
+ environment_info : Dict[str, Any]
2489
+ Information about the execution environment.
2490
+ """
2299
2491
  @property
2300
2492
  def task(self) -> typing.Optional[metaflow.client.core.Task]:
2301
2493
  """
@@ -2430,6 +2622,55 @@ class Step(metaflow.client.core.MetaflowObject, metaclass=type):
2430
2622
  ...
2431
2623
 
2432
2624
  class Task(metaflow.client.core.MetaflowObject, metaclass=type):
2625
+ """
2626
+ A `Task` represents an execution of a `Step`.
2627
+
2628
+ It contains all `DataArtifact` objects produced by the task as
2629
+ well as metadata related to execution.
2630
+
2631
+ Note that the `@retry` decorator may cause multiple attempts of
2632
+ the task to be present. Usually you want the latest attempt, which
2633
+ is what instantiating a `Task` object returns by default. If
2634
+ you need to e.g. retrieve logs from a failed attempt, you can
2635
+ explicitly get information about a specific attempt by using the
2636
+ following syntax when creating a task:
2637
+
2638
+ `Task('flow/run/step/task', attempt=<attempt>)`
2639
+
2640
+ where `attempt=0` corresponds to the first attempt etc.
2641
+
2642
+ Attributes
2643
+ ----------
2644
+ metadata : List[Metadata]
2645
+ List of all metadata events associated with the task.
2646
+ metadata_dict : Dict[str, str]
2647
+ A condensed version of `metadata`: A dictionary where keys
2648
+ are names of metadata events and values the latest corresponding event.
2649
+ data : MetaflowData
2650
+ Container of all data artifacts produced by this task. Note that this
2651
+ call downloads all data locally, so it can be slower than accessing
2652
+ artifacts individually. See `MetaflowData` for more information.
2653
+ artifacts : MetaflowArtifacts
2654
+ Container of `DataArtifact` objects produced by this task.
2655
+ successful : bool
2656
+ True if the task completed successfully.
2657
+ finished : bool
2658
+ True if the task completed.
2659
+ exception : object
2660
+ Exception raised by this task if there was one.
2661
+ finished_at : datetime
2662
+ Time this task finished.
2663
+ runtime_name : str
2664
+ Runtime this task was executed on.
2665
+ stdout : str
2666
+ Standard output for the task execution.
2667
+ stderr : str
2668
+ Standard error output for the task execution.
2669
+ code : MetaflowCode
2670
+ Code package for this task (if present). See `MetaflowCode`.
2671
+ environment_info : Dict[str, str]
2672
+ Information about the execution environment.
2673
+ """
2433
2674
  def __init__(self, *args, **kwargs):
2434
2675
  ...
2435
2676
  @property
@@ -2744,6 +2985,21 @@ class Task(metaflow.client.core.MetaflowObject, metaclass=type):
2744
2985
  ...
2745
2986
 
2746
2987
  class DataArtifact(metaflow.client.core.MetaflowObject, metaclass=type):
2988
+ """
2989
+ A single data artifact and associated metadata. Note that this object does
2990
+ not contain other objects as it is the leaf object in the hierarchy.
2991
+
2992
+ Attributes
2993
+ ----------
2994
+ data : object
2995
+ The data contained in this artifact, that is, the object produced during
2996
+ execution of this run.
2997
+ sha : string
2998
+ A unique ID of this artifact.
2999
+ finished_at : datetime
3000
+ Corresponds roughly to the `Task.finished_at` time of the parent `Task`.
3001
+ An alias for `DataArtifact.created_at`.
3002
+ """
2747
3003
  @property
2748
3004
  def data(self) -> typing.Any:
2749
3005
  """
@@ -2800,6 +3056,44 @@ class DataArtifact(metaflow.client.core.MetaflowObject, metaclass=type):
2800
3056
  ...
2801
3057
 
2802
3058
  class Runner(object, metaclass=type):
3059
+ """
3060
+ Metaflow's Runner API that presents a programmatic interface
3061
+ to run flows and perform other operations either synchronously or asynchronously.
3062
+ The class expects a path to the flow file along with optional arguments
3063
+ that match top-level options on the command-line.
3064
+
3065
+ This class works as a context manager, calling `cleanup()` to remove
3066
+ temporary files at exit.
3067
+
3068
+ Example:
3069
+ ```python
3070
+ with Runner('slowflow.py', pylint=False) as runner:
3071
+ result = runner.run(alpha=5, tags=["abc", "def"], max_workers=5)
3072
+ print(result.run.finished)
3073
+ ```
3074
+
3075
+ Parameters
3076
+ ----------
3077
+ flow_file : str
3078
+ Path to the flow file to run
3079
+ show_output : bool, default True
3080
+ Show the 'stdout' and 'stderr' to the console by default,
3081
+ Only applicable for synchronous 'run' and 'resume' functions.
3082
+ profile : Optional[str], default None
3083
+ Metaflow profile to use to run this run. If not specified, the default
3084
+ profile is used (or the one already set using `METAFLOW_PROFILE`)
3085
+ env : Optional[Dict], default None
3086
+ Additional environment variables to set for the Run. This overrides the
3087
+ environment set for this process.
3088
+ cwd : Optional[str], default None
3089
+ The directory to run the subprocess in; if not specified, the current
3090
+ directory is used.
3091
+ file_read_timeout : int, default 3600
3092
+ The timeout until which we try to read the runner attribute file.
3093
+ **kwargs : Any
3094
+ Additional arguments that you would pass to `python myflow.py` before
3095
+ the `run` command.
3096
+ """
2803
3097
  def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
2804
3098
  ...
2805
3099
  def __enter__(self) -> metaflow.runner.metaflow_runner.Runner:
@@ -2892,6 +3186,42 @@ class Runner(object, metaclass=type):
2892
3186
  ...
2893
3187
 
2894
3188
  class NBRunner(object, metaclass=type):
3189
+ """
3190
+ A wrapper over `Runner` for executing flows defined in a Jupyter
3191
+ notebook cell.
3192
+
3193
+ Instantiate this class on the last line of a notebook cell where
3194
+ a `flow` is defined. In contrast to `Runner`, this class is not
3195
+ meant to be used in a context manager. Instead, use a blocking helper
3196
+ function like `nbrun` (which calls `cleanup()` internally) or call
3197
+ `cleanup()` explictly when using non-blocking APIs.
3198
+
3199
+ ```python
3200
+ run = NBRunner(FlowName).nbrun()
3201
+ ```
3202
+
3203
+ Parameters
3204
+ ----------
3205
+ flow : FlowSpec
3206
+ Flow defined in the same cell
3207
+ show_output : bool, default True
3208
+ Show the 'stdout' and 'stderr' to the console by default,
3209
+ Only applicable for synchronous 'run' and 'resume' functions.
3210
+ profile : Optional[str], default None
3211
+ Metaflow profile to use to run this run. If not specified, the default
3212
+ profile is used (or the one already set using `METAFLOW_PROFILE`)
3213
+ env : Optional[Dict], default None
3214
+ Additional environment variables to set for the Run. This overrides the
3215
+ environment set for this process.
3216
+ base_dir : Optional[str], default None
3217
+ The directory to run the subprocess in; if not specified, a temporary
3218
+ directory is used.
3219
+ file_read_timeout : int, default 3600
3220
+ The timeout until which we try to read the runner attribute file.
3221
+ **kwargs : Any
3222
+ Additional arguments that you would pass to `python myflow.py` before
3223
+ the `run` command.
3224
+ """
2895
3225
  def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: str = "/tmp", file_read_timeout: int = 3600, **kwargs):
2896
3226
  ...
2897
3227
  def nbrun(self, **kwargs):
@@ -2998,6 +3328,30 @@ class NBRunner(object, metaclass=type):
2998
3328
  ...
2999
3329
 
3000
3330
  class Deployer(object, metaclass=type):
3331
+ """
3332
+ Use the `Deployer` class to configure and access one of the production
3333
+ orchestrators supported by Metaflow.
3334
+
3335
+ Parameters
3336
+ ----------
3337
+ flow_file : str
3338
+ Path to the flow file to deploy.
3339
+ show_output : bool, default True
3340
+ Show the 'stdout' and 'stderr' to the console by default.
3341
+ profile : Optional[str], default None
3342
+ Metaflow profile to use for the deployment. If not specified, the default
3343
+ profile is used.
3344
+ env : Optional[Dict[str, str]], default None
3345
+ Additional environment variables to set for the deployment.
3346
+ cwd : Optional[str], default None
3347
+ The directory to run the subprocess in; if not specified, the current
3348
+ directory is used.
3349
+ file_read_timeout : int, default 3600
3350
+ The timeout until which we try to read the deployer attribute file.
3351
+ **kwargs : Any
3352
+ Additional arguments that you would pass to `python myflow.py` before
3353
+ the deployment command.
3354
+ """
3001
3355
  def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
3002
3356
  ...
3003
3357
  def _Deployer__make_function(self, deployer_class):
@@ -3018,6 +3372,43 @@ class Deployer(object, metaclass=type):
3018
3372
  ...
3019
3373
 
3020
3374
  class NBDeployer(object, metaclass=type):
3375
+ """
3376
+ A wrapper over `Deployer` for deploying flows defined in a Jupyter
3377
+ notebook cell.
3378
+
3379
+ Instantiate this class on the last line of a notebook cell where
3380
+ a `flow` is defined. In contrast to `Deployer`, this class is not
3381
+ meant to be used in a context manager.
3382
+
3383
+ ```python
3384
+ deployer = NBDeployer(FlowName)
3385
+ ar = deployer.argo_workflows(name="madhur")
3386
+ ar_obj = ar.create()
3387
+ result = ar_obj.trigger(alpha=300)
3388
+ print(result.status)
3389
+ print(result.run)
3390
+ result.terminate()
3391
+ ```
3392
+
3393
+ Parameters
3394
+ ----------
3395
+ flow : FlowSpec
3396
+ Flow defined in the same cell
3397
+ show_output : bool, default True
3398
+ Show the 'stdout' and 'stderr' to the console by default,
3399
+ profile : Optional[str], default None
3400
+ Metaflow profile to use to deploy this run. If not specified, the default
3401
+ profile is used (or the one already set using `METAFLOW_PROFILE`)
3402
+ env : Optional[Dict[str, str]], default None
3403
+ Additional environment variables to set. This overrides the
3404
+ environment set for this process.
3405
+ base_dir : Optional[str], default None
3406
+ The directory to run the subprocess in; if not specified, a temporary
3407
+ directory is used.
3408
+ **kwargs : Any
3409
+ Additional arguments that you would pass to `python myflow.py` i.e. options
3410
+ listed in `python myflow.py --help`
3411
+ """
3021
3412
  def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: str = "/tmp", file_read_timeout: int = 3600, **kwargs):
3022
3413
  ...
3023
3414
  def cleanup(self):