metaflow-stubs 2.12.28__py2.py3-none-any.whl → 2.12.29__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. metaflow-stubs/__init__.pyi +297 -3001
  2. metaflow-stubs/cards.pyi +19 -473
  3. metaflow-stubs/cli.pyi +17 -81
  4. metaflow-stubs/client/__init__.pyi +19 -1113
  5. metaflow-stubs/client/core.pyi +18 -158
  6. metaflow-stubs/client/filecache.pyi +8 -12
  7. metaflow-stubs/clone_util.pyi +6 -26
  8. metaflow-stubs/events.pyi +6 -5
  9. metaflow-stubs/exception.pyi +8 -6
  10. metaflow-stubs/flowspec.pyi +22 -106
  11. metaflow-stubs/generated_for.txt +1 -1
  12. metaflow-stubs/includefile.pyi +16 -564
  13. metaflow-stubs/info_file.pyi +6 -5
  14. metaflow-stubs/metadata_provider/__init__.pyi +16 -0
  15. metaflow-stubs/metadata_provider/heartbeat.pyi +34 -0
  16. metaflow-stubs/{metadata → metadata_provider}/metadata.pyi +10 -22
  17. metaflow-stubs/metadata_provider/util.pyi +19 -0
  18. metaflow-stubs/metaflow_config.pyi +8 -11
  19. metaflow-stubs/metaflow_current.pyi +10 -9
  20. metaflow-stubs/mflog/__init__.pyi +6 -0
  21. metaflow-stubs/mflog/mflog.pyi +52 -5
  22. metaflow-stubs/multicore_utils.pyi +6 -5
  23. metaflow-stubs/parameters.pyi +13 -23
  24. metaflow-stubs/plugins/__init__.pyi +51 -163
  25. metaflow-stubs/plugins/airflow/__init__.pyi +12 -5
  26. metaflow-stubs/plugins/airflow/airflow.pyi +19 -130
  27. metaflow-stubs/plugins/airflow/airflow_cli.pyi +17 -136
  28. metaflow-stubs/plugins/airflow/airflow_decorator.pyi +7 -26
  29. metaflow-stubs/plugins/airflow/airflow_utils.pyi +7 -6
  30. metaflow-stubs/plugins/airflow/exception.pyi +7 -11
  31. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +10 -97
  32. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +9 -30
  33. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +9 -40
  34. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +9 -40
  35. metaflow-stubs/plugins/argo/__init__.pyi +12 -5
  36. metaflow-stubs/plugins/argo/argo_client.pyi +8 -26
  37. metaflow-stubs/plugins/argo/argo_events.pyi +7 -11
  38. metaflow-stubs/plugins/argo/argo_workflows.pyi +16 -120
  39. metaflow-stubs/plugins/argo/argo_workflows_cli.pyi +22 -460
  40. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +12 -404
  41. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +65 -322
  42. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +165 -0
  43. metaflow-stubs/plugins/aws/__init__.pyi +11 -5
  44. metaflow-stubs/plugins/aws/aws_client.pyi +6 -5
  45. metaflow-stubs/plugins/aws/aws_utils.pyi +6 -11
  46. metaflow-stubs/plugins/aws/batch/__init__.pyi +10 -5
  47. metaflow-stubs/plugins/aws/batch/batch.pyi +10 -55
  48. metaflow-stubs/plugins/aws/batch/batch_cli.pyi +10 -31
  49. metaflow-stubs/plugins/aws/batch/batch_client.pyi +7 -11
  50. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +15 -140
  51. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +7 -5
  52. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +10 -21
  53. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +15 -5
  54. metaflow-stubs/plugins/aws/step_functions/dynamo_db_client.pyi +6 -5
  55. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +6 -5
  56. metaflow-stubs/plugins/aws/step_functions/production_token.pyi +6 -5
  57. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +7 -5
  58. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +11 -65
  59. metaflow-stubs/plugins/aws/step_functions/step_functions_cli.pyi +19 -175
  60. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +6 -5
  61. metaflow-stubs/plugins/aws/step_functions/step_functions_decorator.pyi +8 -37
  62. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +53 -290
  63. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +127 -0
  64. metaflow-stubs/plugins/azure/__init__.pyi +12 -7
  65. metaflow-stubs/plugins/azure/azure_credential.pyi +6 -5
  66. metaflow-stubs/plugins/azure/azure_exceptions.pyi +7 -11
  67. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +11 -24
  68. metaflow-stubs/plugins/azure/azure_utils.pyi +11 -29
  69. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +8 -23
  70. metaflow-stubs/plugins/azure/includefile_support.pyi +7 -17
  71. metaflow-stubs/plugins/cards/__init__.pyi +15 -5
  72. metaflow-stubs/plugins/cards/card_cli.pyi +22 -491
  73. metaflow-stubs/plugins/cards/card_client.pyi +13 -75
  74. metaflow-stubs/plugins/cards/card_creator.pyi +7 -10
  75. metaflow-stubs/plugins/cards/card_datastore.pyi +10 -18
  76. metaflow-stubs/plugins/cards/card_decorator.pyi +10 -126
  77. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +14 -81
  78. metaflow-stubs/plugins/cards/card_modules/basic.pyi +14 -97
  79. metaflow-stubs/plugins/cards/card_modules/card.pyi +6 -5
  80. metaflow-stubs/plugins/cards/card_modules/chevron/__init__.pyi +12 -73
  81. metaflow-stubs/plugins/cards/card_modules/chevron/main.pyi +6 -61
  82. metaflow-stubs/plugins/cards/card_modules/chevron/metadata.pyi +6 -5
  83. metaflow-stubs/plugins/cards/card_modules/chevron/renderer.pyi +8 -45
  84. metaflow-stubs/plugins/cards/card_modules/chevron/tokenizer.pyi +7 -6
  85. metaflow-stubs/plugins/cards/card_modules/components.pyi +25 -108
  86. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +6 -5
  87. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +6 -12
  88. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +11 -88
  89. metaflow-stubs/plugins/cards/card_resolver.pyi +6 -49
  90. metaflow-stubs/plugins/cards/component_serializer.pyi +13 -63
  91. metaflow-stubs/plugins/cards/exception.pyi +7 -11
  92. metaflow-stubs/plugins/catch_decorator.pyi +10 -30
  93. metaflow-stubs/plugins/datatools/__init__.pyi +13 -392
  94. metaflow-stubs/plugins/datatools/local.pyi +7 -11
  95. metaflow-stubs/plugins/datatools/s3/__init__.pyi +19 -653
  96. metaflow-stubs/plugins/datatools/s3/s3.pyi +16 -264
  97. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +7 -10
  98. metaflow-stubs/plugins/datatools/s3/s3util.pyi +6 -11
  99. metaflow-stubs/plugins/debug_logger.pyi +7 -5
  100. metaflow-stubs/plugins/debug_monitor.pyi +7 -5
  101. metaflow-stubs/plugins/environment_decorator.pyi +7 -5
  102. metaflow-stubs/plugins/events_decorator.pyi +8 -14
  103. metaflow-stubs/plugins/frameworks/__init__.pyi +7 -5
  104. metaflow-stubs/plugins/frameworks/pytorch.pyi +8 -45
  105. metaflow-stubs/plugins/gcp/__init__.pyi +11 -7
  106. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +11 -24
  107. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +7 -11
  108. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +6 -5
  109. metaflow-stubs/plugins/gcp/gs_utils.pyi +8 -20
  110. metaflow-stubs/plugins/gcp/includefile_support.pyi +7 -17
  111. metaflow-stubs/plugins/kubernetes/__init__.pyi +13 -5
  112. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +6 -10
  113. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +9 -29
  114. metaflow-stubs/plugins/kubernetes/kubernetes_cli.pyi +16 -155
  115. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +9 -72
  116. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +19 -142
  117. metaflow-stubs/plugins/kubernetes/kubernetes_job.pyi +8 -41
  118. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +7 -11
  119. metaflow-stubs/plugins/logs_cli.pyi +11 -10
  120. metaflow-stubs/plugins/package_cli.pyi +7 -5
  121. metaflow-stubs/plugins/parallel_decorator.pyi +11 -59
  122. metaflow-stubs/plugins/project_decorator.pyi +8 -14
  123. metaflow-stubs/plugins/pypi/__init__.pyi +12 -11
  124. metaflow-stubs/plugins/pypi/conda_decorator.pyi +8 -27
  125. metaflow-stubs/plugins/pypi/conda_environment.pyi +14 -20
  126. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +7 -5
  127. metaflow-stubs/plugins/pypi/pypi_environment.pyi +7 -39
  128. metaflow-stubs/plugins/pypi/utils.pyi +7 -11
  129. metaflow-stubs/plugins/resources_decorator.pyi +7 -5
  130. metaflow-stubs/plugins/retry_decorator.pyi +7 -11
  131. metaflow-stubs/plugins/secrets/__init__.pyi +9 -5
  132. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +9 -14
  133. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +7 -11
  134. metaflow-stubs/plugins/storage_executor.pyi +6 -11
  135. metaflow-stubs/plugins/tag_cli.pyi +14 -396
  136. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +9 -34
  137. metaflow-stubs/plugins/timeout_decorator.pyi +8 -12
  138. metaflow-stubs/procpoll.pyi +7 -5
  139. metaflow-stubs/pylint_wrapper.pyi +7 -11
  140. metaflow-stubs/runner/__init__.pyi +13 -5
  141. metaflow-stubs/runner/deployer.pyi +102 -210
  142. metaflow-stubs/runner/deployer_impl.pyi +87 -0
  143. metaflow-stubs/runner/metaflow_runner.pyi +24 -508
  144. metaflow-stubs/runner/nbdeploy.pyi +16 -60
  145. metaflow-stubs/runner/nbrun.pyi +11 -148
  146. metaflow-stubs/runner/subprocess_manager.pyi +9 -10
  147. metaflow-stubs/runner/utils.pyi +44 -9
  148. metaflow-stubs/system/__init__.pyi +9 -87
  149. metaflow-stubs/system/system_logger.pyi +7 -6
  150. metaflow-stubs/system/system_monitor.pyi +6 -5
  151. metaflow-stubs/tagging_util.pyi +6 -10
  152. metaflow-stubs/tuple_util.pyi +6 -5
  153. metaflow-stubs/version.pyi +6 -5
  154. {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.29.dist-info}/METADATA +2 -2
  155. metaflow_stubs-2.12.29.dist-info/RECORD +158 -0
  156. metaflow-stubs/metadata/util.pyi +0 -18
  157. metaflow_stubs-2.12.28.dist-info/RECORD +0 -152
  158. {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.29.dist-info}/WHEEL +0 -0
  159. {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.29.dist-info}/top_level.txt +0 -0
@@ -1,785 +1,68 @@
1
- ##################################################################################
2
- # Auto-generated Metaflow stub file #
3
- # MF version: 2.12.28 #
4
- # Generated on 2024-11-01T10:21:04.434546 #
5
- ##################################################################################
1
+ ######################################################################################################
2
+ # Auto-generated Metaflow stub file #
3
+ # MF version: 2.12.29 #
4
+ # Generated on 2024-11-07T22:19:34.657765 #
5
+ ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import metaflow.metaflow_current
12
- import typing
13
11
  import datetime
14
- import io
15
- import metaflow.runner.metaflow_runner
16
- import metaflow.client.core
17
- import metaflow.events
18
- import metaflow.datastore.inputs
19
- import metaflow.flowspec
20
- import metaflow._vendor.click.types
21
- import metaflow.parameters
22
- import metaflow.plugins.datatools.s3.s3
12
+ import typing
23
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
24
14
  StepFlag = typing.NewType("StepFlag", bool)
25
15
 
26
- EXT_PKG: str
27
-
28
- def parallel_imap_unordered(func: typing.Callable[[typing.Any], typing.Any], iterable: typing.Iterable[typing.Any], max_parallel: typing.Optional[int] = None, dir: typing.Optional[str] = None) -> typing.Iterator[typing.Any]:
29
- """
30
- Parallelizes execution of a function using multiprocessing. The result
31
- order is not guaranteed.
32
-
33
- Parameters
34
- ----------
35
- func : Callable[[Any], Any]
36
- Function taking a single argument and returning a result
37
- iterable : Iterable[Any]
38
- Iterable over arguments to pass to fun
39
- max_parallel int, optional, default None
40
- Maximum parallelism. If not specified, uses the number of CPUs
41
- dir : str, optional, default None
42
- If specified, directory where temporary files are created
43
-
44
- Yields
45
- ------
46
- Any
47
- One result from calling func on one argument
48
- """
49
- ...
50
-
51
- def parallel_map(func: typing.Callable[[typing.Any], typing.Any], iterable: typing.Iterable[typing.Any], max_parallel: typing.Optional[int] = None, dir: typing.Optional[str] = None) -> typing.List[typing.Any]:
52
- """
53
- Parallelizes execution of a function using multiprocessing. The result
54
- order is that of the arguments in `iterable`
55
-
56
- Parameters
57
- ----------
58
- func : Callable[[Any], Any]
59
- Function taking a single argument and returning a result
60
- iterable : Iterable[Any]
61
- Iterable over arguments to pass to fun
62
- max_parallel int, optional, default None
63
- Maximum parallelism. If not specified, uses the number of CPUs
64
- dir : str, optional, default None
65
- If specified, directory where temporary files are created
66
-
67
- Returns
68
- -------
69
- List[Any]
70
- Results. The items in the list are in the same order as the items
71
- in `iterable`.
72
- """
73
- ...
74
-
75
- current: metaflow.metaflow_current.Current
76
-
77
- def metadata(ms: str) -> str:
78
- """
79
- Switch Metadata provider.
80
-
81
- This call has a global effect. Selecting the local metadata will,
82
- for example, not allow access to information stored in remote
83
- metadata providers.
84
-
85
- Note that you don't typically have to call this function directly. Usually
86
- the metadata provider is set through the Metaflow configuration file. If you
87
- need to switch between multiple providers, you can use the `METAFLOW_PROFILE`
88
- environment variable to switch between configurations.
89
-
90
- Parameters
91
- ----------
92
- ms : str
93
- Can be a path (selects local metadata), a URL starting with http (selects
94
- the service metadata) or an explicit specification <metadata_type>@<info>; as an
95
- example, you can specify local@<path> or service@<url>.
96
-
97
- Returns
98
- -------
99
- str
100
- The description of the metadata selected (equivalent to the result of
101
- get_metadata()).
102
- """
103
- ...
16
+ from . import info_file as info_file
17
+ from . import exception as exception
18
+ from . import metaflow_config as metaflow_config
19
+ from . import multicore_utils as multicore_utils
20
+ from .multicore_utils import parallel_imap_unordered as parallel_imap_unordered
21
+ from .multicore_utils import parallel_map as parallel_map
22
+ from . import metaflow_current as metaflow_current
23
+ from .metaflow_current import current as current
24
+ from . import parameters as parameters
25
+ from . import tagging_util as tagging_util
26
+ from . import metadata_provider as metadata_provider
27
+ from . import flowspec as flowspec
28
+ from .flowspec import FlowSpec as FlowSpec
29
+ from .parameters import Parameter as Parameter
30
+ from .parameters import JSONTypeClass as JSONTypeClass
31
+ from .parameters import JSONType as JSONType
32
+ from . import events as events
33
+ from . import tuple_util as tuple_util
34
+ from . import runner as runner
35
+ from . import plugins as plugins
36
+ from .plugins.datatools.s3.s3 import S3 as S3
37
+ from . import includefile as includefile
38
+ from .includefile import IncludeFile as IncludeFile
39
+ from . import cards as cards
40
+ from . import client as client
41
+ from .client.core import namespace as namespace
42
+ from .client.core import get_namespace as get_namespace
43
+ from .client.core import default_namespace as default_namespace
44
+ from .client.core import metadata as metadata
45
+ from .client.core import get_metadata as get_metadata
46
+ from .client.core import default_metadata as default_metadata
47
+ from .client.core import Metaflow as Metaflow
48
+ from .client.core import Flow as Flow
49
+ from .client.core import Run as Run
50
+ from .client.core import Step as Step
51
+ from .client.core import Task as Task
52
+ from .client.core import DataArtifact as DataArtifact
53
+ from .runner.metaflow_runner import Runner as Runner
54
+ from .runner.nbrun import NBRunner as NBRunner
55
+ from .runner.deployer import Deployer as Deployer
56
+ from .runner.deployer import DeployedFlow as DeployedFlow
57
+ from .runner.nbdeploy import NBDeployer as NBDeployer
58
+ from . import version as version
59
+ from . import system as system
60
+ from . import pylint_wrapper as pylint_wrapper
61
+ from . import procpoll as procpoll
62
+ from . import clone_util as clone_util
63
+ from . import cli as cli
104
64
 
105
- class FlowSpec(object, metaclass=metaflow.flowspec._FlowSpecMeta):
106
- """
107
- Main class from which all Flows should inherit.
108
-
109
- Attributes
110
- ----------
111
- index
112
- input
113
- """
114
- def __init__(self, use_cli = True):
115
- """
116
- Construct a FlowSpec
117
-
118
- Parameters
119
- ----------
120
- use_cli : bool, default True
121
- Set to True if the flow is invoked from __main__ or the command line
122
- """
123
- ...
124
- @property
125
- def script_name(self) -> str:
126
- """
127
- [Legacy function - do not use. Use `current` instead]
128
-
129
- Returns the name of the script containing the flow
130
-
131
- Returns
132
- -------
133
- str
134
- A string containing the name of the script
135
- """
136
- ...
137
- def __iter__(self):
138
- """
139
- [Legacy function - do not use]
140
-
141
- Iterate over all steps in the Flow
142
-
143
- Returns
144
- -------
145
- Iterator[graph.DAGNode]
146
- Iterator over the steps in the flow
147
- """
148
- ...
149
- def __getattr__(self, name: str):
150
- ...
151
- def cmd(self, cmdline, input = {}, output = []):
152
- """
153
- [Legacy function - do not use]
154
- """
155
- ...
156
- @property
157
- def index(self) -> typing.Optional[int]:
158
- """
159
- The index of this foreach branch.
160
-
161
- In a foreach step, multiple instances of this step (tasks) will be executed,
162
- one for each element in the foreach. This property returns the zero based index
163
- of the current task. If this is not a foreach step, this returns None.
164
-
165
- If you need to know the indices of the parent tasks in a nested foreach, use
166
- `FlowSpec.foreach_stack`.
167
-
168
- Returns
169
- -------
170
- int, optional
171
- Index of the task in a foreach step.
172
- """
173
- ...
174
- @property
175
- def input(self) -> typing.Optional[typing.Any]:
176
- """
177
- The value of the foreach artifact in this foreach branch.
178
-
179
- In a foreach step, multiple instances of this step (tasks) will be executed,
180
- one for each element in the foreach. This property returns the element passed
181
- to the current task. If this is not a foreach step, this returns None.
182
-
183
- If you need to know the values of the parent tasks in a nested foreach, use
184
- `FlowSpec.foreach_stack`.
185
-
186
- Returns
187
- -------
188
- object, optional
189
- Input passed to the foreach task.
190
- """
191
- ...
192
- def foreach_stack(self) -> typing.Optional[typing.List[typing.Tuple[int, int, typing.Any]]]:
193
- """
194
- Returns the current stack of foreach indexes and values for the current step.
195
-
196
- Use this information to understand what data is being processed in the current
197
- foreach branch. For example, considering the following code:
198
- ```
199
- @step
200
- def root(self):
201
- self.split_1 = ['a', 'b', 'c']
202
- self.next(self.nest_1, foreach='split_1')
203
-
204
- @step
205
- def nest_1(self):
206
- self.split_2 = ['d', 'e', 'f', 'g']
207
- self.next(self.nest_2, foreach='split_2'):
208
-
209
- @step
210
- def nest_2(self):
211
- foo = self.foreach_stack()
212
- ```
213
-
214
- `foo` will take the following values in the various tasks for nest_2:
215
- ```
216
- [(0, 3, 'a'), (0, 4, 'd')]
217
- [(0, 3, 'a'), (1, 4, 'e')]
218
- ...
219
- [(0, 3, 'a'), (3, 4, 'g')]
220
- [(1, 3, 'b'), (0, 4, 'd')]
221
- ...
222
- ```
223
- where each tuple corresponds to:
224
-
225
- - The index of the task for that level of the loop.
226
- - The number of splits for that level of the loop.
227
- - The value for that level of the loop.
228
-
229
- Note that the last tuple returned in a task corresponds to:
230
-
231
- - 1st element: value returned by `self.index`.
232
- - 3rd element: value returned by `self.input`.
233
-
234
- Returns
235
- -------
236
- List[Tuple[int, int, Any]]
237
- An array describing the current stack of foreach steps.
238
- """
239
- ...
240
- def merge_artifacts(self, inputs: metaflow.datastore.inputs.Inputs, exclude: typing.Optional[typing.List[str]] = None, include: typing.Optional[typing.List[str]] = None):
241
- """
242
- Helper function for merging artifacts in a join step.
243
-
244
- This function takes all the artifacts coming from the branches of a
245
- join point and assigns them to self in the calling step. Only artifacts
246
- not set in the current step are considered. If, for a given artifact, different
247
- values are present on the incoming edges, an error will be thrown and the artifacts
248
- that conflict will be reported.
249
-
250
- As a few examples, in the simple graph: A splitting into B and C and joining in D:
251
- ```
252
- A:
253
- self.x = 5
254
- self.y = 6
255
- B:
256
- self.b_var = 1
257
- self.x = from_b
258
- C:
259
- self.x = from_c
260
-
261
- D:
262
- merge_artifacts(inputs)
263
- ```
264
- In D, the following artifacts are set:
265
- - `y` (value: 6), `b_var` (value: 1)
266
- - if `from_b` and `from_c` are the same, `x` will be accessible and have value `from_b`
267
- - if `from_b` and `from_c` are different, an error will be thrown. To prevent this error,
268
- you need to manually set `self.x` in D to a merged value (for example the max) prior to
269
- calling `merge_artifacts`.
270
-
271
- Parameters
272
- ----------
273
- inputs : Inputs
274
- Incoming steps to the join point.
275
- exclude : List[str], optional, default None
276
- If specified, do not consider merging artifacts with a name in `exclude`.
277
- Cannot specify if `include` is also specified.
278
- include : List[str], optional, default None
279
- If specified, only merge artifacts specified. Cannot specify if `exclude` is
280
- also specified.
281
-
282
- Raises
283
- ------
284
- MetaflowException
285
- This exception is thrown if this is not called in a join step.
286
- UnhandledInMergeArtifactsException
287
- This exception is thrown in case of unresolved conflicts.
288
- MissingInMergeArtifactsException
289
- This exception is thrown in case an artifact specified in `include` cannot
290
- be found.
291
- """
292
- ...
293
- def next(self, *dsts: typing.Callable[..., None], **kwargs):
294
- """
295
- Indicates the next step to execute after this step has completed.
296
-
297
- This statement should appear as the last statement of each step, except
298
- the end step.
299
-
300
- There are several valid formats to specify the next step:
301
-
302
- - Straight-line connection: `self.next(self.next_step)` where `next_step` is a method in
303
- the current class decorated with the `@step` decorator.
304
-
305
- - Static fan-out connection: `self.next(self.step1, self.step2, ...)` where `stepX` are
306
- methods in the current class decorated with the `@step` decorator.
307
-
308
- - Foreach branch:
309
- ```
310
- self.next(self.foreach_step, foreach='foreach_iterator')
311
- ```
312
- In this situation, `foreach_step` is a method in the current class decorated with the
313
- `@step` decorator and `foreach_iterator` is a variable name in the current class that
314
- evaluates to an iterator. A task will be launched for each value in the iterator and
315
- each task will execute the code specified by the step `foreach_step`.
316
-
317
- Parameters
318
- ----------
319
- dsts : Callable[..., None]
320
- One or more methods annotated with `@step`.
321
-
322
- Raises
323
- ------
324
- InvalidNextException
325
- Raised if the format of the arguments does not match one of the ones given above.
326
- """
327
- ...
328
- def __str__(self):
329
- ...
330
- def __getstate__(self):
331
- ...
332
- ...
333
-
334
- class Parameter(object, metaclass=type):
335
- """
336
- Defines a parameter for a flow.
337
-
338
- Parameters must be instantiated as class variables in flow classes, e.g.
339
- ```
340
- class MyFlow(FlowSpec):
341
- param = Parameter('myparam')
342
- ```
343
- in this case, the parameter is specified on the command line as
344
- ```
345
- python myflow.py run --myparam=5
346
- ```
347
- and its value is accessible through a read-only artifact like this:
348
- ```
349
- print(self.param == 5)
350
- ```
351
- Note that the user-visible parameter name, `myparam` above, can be
352
- different from the artifact name, `param` above.
353
-
354
- The parameter value is converted to a Python type based on the `type`
355
- argument or to match the type of `default`, if it is set.
356
-
357
- Parameters
358
- ----------
359
- name : str
360
- User-visible parameter name.
361
- default : str or float or int or bool or `JSONType` or a function.
362
- Default value for the parameter. Use a special `JSONType` class to
363
- indicate that the value must be a valid JSON object. A function
364
- implies that the parameter corresponds to a *deploy-time parameter*.
365
- The type of the default value is used as the parameter `type`.
366
- type : Type, default None
367
- If `default` is not specified, define the parameter type. Specify
368
- one of `str`, `float`, `int`, `bool`, or `JSONType`. If None, defaults
369
- to the type of `default` or `str` if none specified.
370
- help : str, optional
371
- Help text to show in `run --help`.
372
- required : bool, default False
373
- Require that the user specified a value for the parameter.
374
- `required=True` implies that the `default` is not used.
375
- show_default : bool, default True
376
- If True, show the default value in the help text.
377
- """
378
- def __init__(self, name: str, default: typing.Union[str, float, int, bool, typing.Dict[str, typing.Any], typing.Callable[[], typing.Union[str, float, int, bool, typing.Dict[str, typing.Any]]], None] = None, type: typing.Union[typing.Type[str], typing.Type[float], typing.Type[int], typing.Type[bool], metaflow.parameters.JSONTypeClass, None] = None, help: typing.Optional[str] = None, required: bool = False, show_default: bool = True, **kwargs: typing.Dict[str, typing.Any]):
379
- ...
380
- def __repr__(self):
381
- ...
382
- def __str__(self):
383
- ...
384
- def option_kwargs(self, deploy_mode):
385
- ...
386
- def load_parameter(self, v):
387
- ...
388
- @property
389
- def is_string_type(self):
390
- ...
391
- def __getitem__(self, x):
392
- ...
393
- ...
394
-
395
- class JSONTypeClass(metaflow._vendor.click.types.ParamType, metaclass=type):
396
- def convert(self, value, param, ctx):
397
- ...
398
- def __str__(self):
399
- ...
400
- def __repr__(self):
401
- ...
402
- ...
403
-
404
- JSONType: metaflow.parameters.JSONTypeClass
405
-
406
- class S3(object, metaclass=type):
407
- """
408
- The Metaflow S3 client.
409
-
410
- This object manages the connection to S3 and a temporary diretory that is used
411
- to download objects. Note that in most cases when the data fits in memory, no local
412
- disk IO is needed as operations are cached by the operating system, which makes
413
- operations fast as long as there is enough memory available.
414
-
415
- The easiest way is to use this object as a context manager:
416
- ```
417
- with S3() as s3:
418
- data = [obj.blob for obj in s3.get_many(urls)]
419
- print(data)
420
- ```
421
- The context manager takes care of creating and deleting a temporary directory
422
- automatically. Without a context manager, you must call `.close()` to delete
423
- the directory explicitly:
424
- ```
425
- s3 = S3()
426
- data = [obj.blob for obj in s3.get_many(urls)]
427
- s3.close()
428
- ```
429
- You can customize the location of the temporary directory with `tmproot`. It
430
- defaults to the current working directory.
431
-
432
- To make it easier to deal with object locations, the client can be initialized
433
- with an S3 path prefix. There are three ways to handle locations:
434
-
435
- 1. Use a `metaflow.Run` object or `self`, e.g. `S3(run=self)` which
436
- initializes the prefix with the global `DATATOOLS_S3ROOT` path, combined
437
- with the current run ID. This mode makes it easy to version data based
438
- on the run ID consistently. You can use the `bucket` and `prefix` to
439
- override parts of `DATATOOLS_S3ROOT`.
440
-
441
- 2. Specify an S3 prefix explicitly with `s3root`,
442
- e.g. `S3(s3root='s3://mybucket/some/path')`.
443
-
444
- 3. Specify nothing, i.e. `S3()`, in which case all operations require
445
- a full S3 url prefixed with `s3://`.
446
-
447
- Parameters
448
- ----------
449
- tmproot : str, default: '.'
450
- Where to store the temporary directory.
451
- bucket : str, optional
452
- Override the bucket from `DATATOOLS_S3ROOT` when `run` is specified.
453
- prefix : str, optional
454
- Override the path from `DATATOOLS_S3ROOT` when `run` is specified.
455
- run : FlowSpec or Run, optional
456
- Derive path prefix from the current or a past run ID, e.g. S3(run=self).
457
- s3root : str, optional
458
- If `run` is not specified, use this as the S3 prefix.
459
- """
460
- @classmethod
461
- def get_root_from_config(cls, echo, create_on_absent = True):
462
- ...
463
- def __enter__(self) -> metaflow.plugins.datatools.s3.s3.S3:
464
- ...
465
- def __exit__(self, *args):
466
- ...
467
- def close(self):
468
- """
469
- Delete all temporary files downloaded in this context.
470
- """
471
- ...
472
- def list_paths(self, keys: typing.Optional[typing.Iterable[str]] = None) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
473
- """
474
- List the next level of paths in S3.
475
-
476
- If multiple keys are specified, listings are done in parallel. The returned
477
- S3Objects have `.exists == False` if the path refers to a prefix, not an
478
- existing S3 object.
479
-
480
- For instance, if the directory hierarchy is
481
- ```
482
- a/0.txt
483
- a/b/1.txt
484
- a/c/2.txt
485
- a/d/e/3.txt
486
- f/4.txt
487
- ```
488
- The `list_paths(['a', 'f'])` call returns
489
- ```
490
- a/0.txt (exists == True)
491
- a/b/ (exists == False)
492
- a/c/ (exists == False)
493
- a/d/ (exists == False)
494
- f/4.txt (exists == True)
495
- ```
496
-
497
- Parameters
498
- ----------
499
- keys : Iterable[str], optional, default None
500
- List of paths.
501
-
502
- Returns
503
- -------
504
- List[S3Object]
505
- S3Objects under the given paths, including prefixes (directories) that
506
- do not correspond to leaf objects.
507
- """
508
- ...
509
- def list_recursive(self, keys: typing.Optional[typing.Iterable[str]] = None) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
510
- """
511
- List all objects recursively under the given prefixes.
512
-
513
- If multiple keys are specified, listings are done in parallel. All objects
514
- returned have `.exists == True` as this call always returns leaf objects.
515
-
516
- For instance, if the directory hierarchy is
517
- ```
518
- a/0.txt
519
- a/b/1.txt
520
- a/c/2.txt
521
- a/d/e/3.txt
522
- f/4.txt
523
- ```
524
- The `list_paths(['a', 'f'])` call returns
525
- ```
526
- a/0.txt (exists == True)
527
- a/b/1.txt (exists == True)
528
- a/c/2.txt (exists == True)
529
- a/d/e/3.txt (exists == True)
530
- f/4.txt (exists == True)
531
- ```
532
-
533
- Parameters
534
- ----------
535
- keys : Iterable[str], optional, default None
536
- List of paths.
537
-
538
- Returns
539
- -------
540
- List[S3Object]
541
- S3Objects under the given paths.
542
- """
543
- ...
544
- def info(self, key: typing.Optional[str] = None, return_missing: bool = False) -> metaflow.plugins.datatools.s3.s3.S3Object:
545
- """
546
- Get metadata about a single object in S3.
547
-
548
- This call makes a single `HEAD` request to S3 which can be
549
- much faster than downloading all data with `get`.
550
-
551
- Parameters
552
- ----------
553
- key : str, optional, default None
554
- Object to query. It can be an S3 url or a path suffix.
555
- return_missing : bool, default False
556
- If set to True, do not raise an exception for a missing key but
557
- return it as an `S3Object` with `.exists == False`.
558
-
559
- Returns
560
- -------
561
- S3Object
562
- An S3Object corresponding to the object requested. The object
563
- will have `.downloaded == False`.
564
- """
565
- ...
566
- def info_many(self, keys: typing.Iterable[str], return_missing: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
567
- """
568
- Get metadata about many objects in S3 in parallel.
569
-
570
- This call makes a single `HEAD` request to S3 which can be
571
- much faster than downloading all data with `get`.
572
-
573
- Parameters
574
- ----------
575
- keys : Iterable[str]
576
- Objects to query. Each key can be an S3 url or a path suffix.
577
- return_missing : bool, default False
578
- If set to True, do not raise an exception for a missing key but
579
- return it as an `S3Object` with `.exists == False`.
580
-
581
- Returns
582
- -------
583
- List[S3Object]
584
- A list of S3Objects corresponding to the paths requested. The
585
- objects will have `.downloaded == False`.
586
- """
587
- ...
588
- def get(self, key: typing.Union[str, metaflow.plugins.datatools.s3.s3.S3GetObject, None] = None, return_missing: bool = False, return_info: bool = True) -> metaflow.plugins.datatools.s3.s3.S3Object:
589
- """
590
- Get a single object from S3.
591
-
592
- Parameters
593
- ----------
594
- key : Union[str, S3GetObject], optional, default None
595
- Object to download. It can be an S3 url, a path suffix, or
596
- an S3GetObject that defines a range of data to download. If None, or
597
- not provided, gets the S3 root.
598
- return_missing : bool, default False
599
- If set to True, do not raise an exception for a missing key but
600
- return it as an `S3Object` with `.exists == False`.
601
- return_info : bool, default True
602
- If set to True, fetch the content-type and user metadata associated
603
- with the object at no extra cost, included for symmetry with `get_many`
604
-
605
- Returns
606
- -------
607
- S3Object
608
- An S3Object corresponding to the object requested.
609
- """
610
- ...
611
- def get_many(self, keys: typing.Iterable[typing.Union[str, metaflow.plugins.datatools.s3.s3.S3GetObject]], return_missing: bool = False, return_info: bool = True) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
612
- """
613
- Get many objects from S3 in parallel.
614
-
615
- Parameters
616
- ----------
617
- keys : Iterable[Union[str, S3GetObject]]
618
- Objects to download. Each object can be an S3 url, a path suffix, or
619
- an S3GetObject that defines a range of data to download.
620
- return_missing : bool, default False
621
- If set to True, do not raise an exception for a missing key but
622
- return it as an `S3Object` with `.exists == False`.
623
- return_info : bool, default True
624
- If set to True, fetch the content-type and user metadata associated
625
- with the object at no extra cost, included for symmetry with `get_many`.
626
-
627
- Returns
628
- -------
629
- List[S3Object]
630
- S3Objects corresponding to the objects requested.
631
- """
632
- ...
633
- def get_recursive(self, keys: typing.Iterable[str], return_info: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
634
- """
635
- Get many objects from S3 recursively in parallel.
636
-
637
- Parameters
638
- ----------
639
- keys : Iterable[str]
640
- Prefixes to download recursively. Each prefix can be an S3 url or a path suffix
641
- which define the root prefix under which all objects are downloaded.
642
- return_info : bool, default False
643
- If set to True, fetch the content-type and user metadata associated
644
- with the object.
645
-
646
- Returns
647
- -------
648
- List[S3Object]
649
- S3Objects stored under the given prefixes.
650
- """
651
- ...
652
- def get_all(self, return_info: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
653
- """
654
- Get all objects under the prefix set in the `S3` constructor.
655
-
656
- This method requires that the `S3` object is initialized either with `run` or
657
- `s3root`.
658
-
659
- Parameters
660
- ----------
661
- return_info : bool, default False
662
- If set to True, fetch the content-type and user metadata associated
663
- with the object.
664
-
665
- Returns
666
- -------
667
- Iterable[S3Object]
668
- S3Objects stored under the main prefix.
669
- """
670
- ...
671
- def put(self, key: typing.Union[str, metaflow.plugins.datatools.s3.s3.S3PutObject], obj: typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes], overwrite: bool = True, content_type: typing.Optional[str] = None, metadata: typing.Optional[typing.Dict[str, str]] = None) -> str:
672
- """
673
- Upload a single object to S3.
674
-
675
- Parameters
676
- ----------
677
- key : Union[str, S3PutObject]
678
- Object path. It can be an S3 url or a path suffix.
679
- obj : PutValue
680
- An object to store in S3. Strings are converted to UTF-8 encoding.
681
- overwrite : bool, default True
682
- Overwrite the object if it exists. If set to False, the operation
683
- succeeds without uploading anything if the key already exists.
684
- content_type : str, optional, default None
685
- Optional MIME type for the object.
686
- metadata : Dict[str, str], optional, default None
687
- A JSON-encodable dictionary of additional headers to be stored
688
- as metadata with the object.
689
-
690
- Returns
691
- -------
692
- str
693
- URL of the object stored.
694
- """
695
- ...
696
- def put_many(self, key_objs: typing.List[typing.Union[typing.Tuple[str, typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes]], metaflow.plugins.datatools.s3.s3.S3PutObject]], overwrite: bool = True) -> typing.List[typing.Tuple[str, str]]:
697
- """
698
- Upload many objects to S3.
699
-
700
- Each object to be uploaded can be specified in two ways:
701
-
702
- 1. As a `(key, obj)` tuple where `key` is a string specifying
703
- the path and `obj` is a string or a bytes object.
704
-
705
- 2. As a `S3PutObject` which contains additional metadata to be
706
- stored with the object.
707
-
708
- Parameters
709
- ----------
710
- key_objs : List[Union[Tuple[str, PutValue], S3PutObject]]
711
- List of key-object pairs to upload.
712
- overwrite : bool, default True
713
- Overwrite the object if it exists. If set to False, the operation
714
- succeeds without uploading anything if the key already exists.
715
-
716
- Returns
717
- -------
718
- List[Tuple[str, str]]
719
- List of `(key, url)` pairs corresponding to the objects uploaded.
720
- """
721
- ...
722
- def put_files(self, key_paths: typing.List[typing.Union[typing.Tuple[str, typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes]], metaflow.plugins.datatools.s3.s3.S3PutObject]], overwrite: bool = True) -> typing.List[typing.Tuple[str, str]]:
723
- """
724
- Upload many local files to S3.
725
-
726
- Each file to be uploaded can be specified in two ways:
727
-
728
- 1. As a `(key, path)` tuple where `key` is a string specifying
729
- the S3 path and `path` is the path to a local file.
730
-
731
- 2. As a `S3PutObject` which contains additional metadata to be
732
- stored with the file.
733
-
734
- Parameters
735
- ----------
736
- key_paths : List[Union[Tuple[str, PutValue], S3PutObject]]
737
- List of files to upload.
738
- overwrite : bool, default True
739
- Overwrite the object if it exists. If set to False, the operation
740
- succeeds without uploading anything if the key already exists.
741
-
742
- Returns
743
- -------
744
- List[Tuple[str, str]]
745
- List of `(key, url)` pairs corresponding to the files uploaded.
746
- """
747
- ...
748
- ...
749
-
750
- class IncludeFile(metaflow.parameters.Parameter, metaclass=type):
751
- """
752
- Includes a local file as a parameter for the flow.
753
-
754
- `IncludeFile` behaves like `Parameter` except that it reads its value from a file instead of
755
- the command line. The user provides a path to a file on the command line. The file contents
756
- are saved as a read-only artifact which is available in all steps of the flow.
757
-
758
- Parameters
759
- ----------
760
- name : str
761
- User-visible parameter name.
762
- default : Union[str, Callable[ParameterContext, str]]
763
- Default path to a local file. A function
764
- implies that the parameter corresponds to a *deploy-time parameter*.
765
- is_text : bool, default True
766
- Convert the file contents to a string using the provided `encoding`.
767
- If False, the artifact is stored in `bytes`.
768
- encoding : str, optional, default 'utf-8'
769
- Use this encoding to decode the file contexts if `is_text=True`.
770
- required : bool, default False
771
- Require that the user specified a value for the parameter.
772
- `required=True` implies that the `default` is not used.
773
- help : str, optional
774
- Help text to show in `run --help`.
775
- show_default : bool, default True
776
- If True, show the default value in the help text.
777
- """
778
- def __init__(self, name: str, required: bool = False, is_text: bool = True, encoding: str = "utf-8", help: typing.Optional[str] = None, **kwargs: typing.Dict[str, str]):
779
- ...
780
- def load_parameter(self, v):
781
- ...
782
- ...
65
+ EXT_PKG: str
783
66
 
784
67
  @typing.overload
785
68
  def step(f: typing.Callable[[FlowSpecDerived], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
@@ -855,189 +138,73 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
855
138
  ...
856
139
 
857
140
  @typing.overload
858
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
141
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
859
142
  """
860
- Specifies the resources needed when executing this step.
861
-
862
- Use `@resources` to specify the resource requirements
863
- independently of the specific compute layer (`@batch`, `@kubernetes`).
864
-
865
- You can choose the compute layer on the command line by executing e.g.
866
- ```
867
- python myflow.py run --with batch
868
- ```
869
- or
870
- ```
871
- python myflow.py run --with kubernetes
872
- ```
873
- which executes the flow on the desired system using the
874
- requirements specified in `@resources`.
143
+ Specifies the Conda environment for the step.
875
144
 
876
- Parameters
877
- ----------
878
- cpu : int, default 1
879
- Number of CPUs required for this step.
880
- gpu : int, optional, default None
881
- Number of GPUs required for this step.
882
- disk : int, optional, default None
883
- Disk size (in MB) required for this step. Only applies on Kubernetes.
884
- memory : int, default 4096
885
- Memory size (in MB) required for this step.
886
- shared_memory : int, optional, default None
887
- The value for the size (in MiB) of the /dev/shm volume for this step.
888
- This parameter maps to the `--shm-size` option in Docker.
145
+ Information in this decorator will augment any
146
+ attributes set in the `@conda_base` flow-level decorator. Hence,
147
+ you can use `@conda_base` to set packages required by all
148
+ steps and use `@conda` to specify step-specific overrides.
889
149
  """
890
150
  ...
891
151
 
892
152
  @typing.overload
893
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
153
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
894
154
  ...
895
155
 
896
156
  @typing.overload
897
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
157
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
898
158
  ...
899
159
 
900
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
160
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
901
161
  """
902
- Specifies the resources needed when executing this step.
903
-
904
- Use `@resources` to specify the resource requirements
905
- independently of the specific compute layer (`@batch`, `@kubernetes`).
906
-
907
- You can choose the compute layer on the command line by executing e.g.
908
- ```
909
- python myflow.py run --with batch
910
- ```
911
- or
912
- ```
913
- python myflow.py run --with kubernetes
914
- ```
915
- which executes the flow on the desired system using the
916
- requirements specified in `@resources`.
162
+ Specifies the Conda environment for the step.
917
163
 
918
- Parameters
919
- ----------
920
- cpu : int, default 1
921
- Number of CPUs required for this step.
922
- gpu : int, optional, default None
923
- Number of GPUs required for this step.
924
- disk : int, optional, default None
925
- Disk size (in MB) required for this step. Only applies on Kubernetes.
926
- memory : int, default 4096
927
- Memory size (in MB) required for this step.
928
- shared_memory : int, optional, default None
929
- The value for the size (in MiB) of the /dev/shm volume for this step.
930
- This parameter maps to the `--shm-size` option in Docker.
164
+ Information in this decorator will augment any
165
+ attributes set in the `@conda_base` flow-level decorator. Hence,
166
+ you can use `@conda_base` to set packages required by all
167
+ steps and use `@conda` to specify step-specific overrides.
931
168
  """
932
169
  ...
933
170
 
934
171
  @typing.overload
935
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
172
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
936
173
  """
937
- Specifies the number of times the task corresponding
938
- to a step needs to be retried.
939
-
940
- This decorator is useful for handling transient errors, such as networking issues.
941
- If your task contains operations that can't be retried safely, e.g. database updates,
942
- it is advisable to annotate it with `@retry(times=0)`.
943
-
944
- This can be used in conjunction with the `@catch` decorator. The `@catch`
945
- decorator will execute a no-op task after all retries have been exhausted,
946
- ensuring that the flow execution can continue.
947
-
948
- Parameters
949
- ----------
950
- times : int, default 3
951
- Number of times to retry this task.
952
- minutes_between_retries : int, default 2
953
- Number of minutes between retries.
174
+ Specifies environment variables to be set prior to the execution of a step.
954
175
  """
955
176
  ...
956
177
 
957
178
  @typing.overload
958
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
179
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
959
180
  ...
960
181
 
961
182
  @typing.overload
962
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
183
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
963
184
  ...
964
185
 
965
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
186
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
966
187
  """
967
- Specifies the number of times the task corresponding
968
- to a step needs to be retried.
969
-
970
- This decorator is useful for handling transient errors, such as networking issues.
971
- If your task contains operations that can't be retried safely, e.g. database updates,
972
- it is advisable to annotate it with `@retry(times=0)`.
973
-
974
- This can be used in conjunction with the `@catch` decorator. The `@catch`
975
- decorator will execute a no-op task after all retries have been exhausted,
976
- ensuring that the flow execution can continue.
977
-
978
- Parameters
979
- ----------
980
- times : int, default 3
981
- Number of times to retry this task.
982
- minutes_between_retries : int, default 2
983
- Number of minutes between retries.
188
+ Specifies environment variables to be set prior to the execution of a step.
984
189
  """
985
190
  ...
986
191
 
987
192
  @typing.overload
988
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
193
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
989
194
  """
990
- Specifies a timeout for your step.
991
-
992
- This decorator is useful if this step may hang indefinitely.
993
-
994
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
995
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
996
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
997
-
998
- Note that all the values specified in parameters are added together so if you specify
999
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1000
-
1001
- Parameters
1002
- ----------
1003
- seconds : int, default 0
1004
- Number of seconds to wait prior to timing out.
1005
- minutes : int, default 0
1006
- Number of minutes to wait prior to timing out.
1007
- hours : int, default 0
1008
- Number of hours to wait prior to timing out.
195
+ Decorator prototype for all step decorators. This function gets specialized
196
+ and imported for all decorators types by _import_plugin_decorators().
1009
197
  """
1010
198
  ...
1011
199
 
1012
200
  @typing.overload
1013
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1014
- ...
1015
-
1016
- @typing.overload
1017
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
201
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1018
202
  ...
1019
203
 
1020
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
204
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1021
205
  """
1022
- Specifies a timeout for your step.
1023
-
1024
- This decorator is useful if this step may hang indefinitely.
1025
-
1026
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1027
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1028
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1029
-
1030
- Note that all the values specified in parameters are added together so if you specify
1031
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1032
-
1033
- Parameters
1034
- ----------
1035
- seconds : int, default 0
1036
- Number of seconds to wait prior to timing out.
1037
- minutes : int, default 0
1038
- Number of minutes to wait prior to timing out.
1039
- hours : int, default 0
1040
- Number of hours to wait prior to timing out.
206
+ Decorator prototype for all step decorators. This function gets specialized
207
+ and imported for all decorators types by _import_plugin_decorators().
1041
208
  """
1042
209
  ...
1043
210
 
@@ -1050,15 +217,6 @@ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] =
1050
217
  attributes set in the `@pyi_base` flow-level decorator. Hence,
1051
218
  you can use `@pypi_base` to set packages required by all
1052
219
  steps and use `@pypi` to specify step-specific overrides.
1053
-
1054
- Parameters
1055
- ----------
1056
- packages : Dict[str, str], default: {}
1057
- Packages to use for this step. The key is the name of the package
1058
- and the value is the version to use.
1059
- python : str, optional, default: None
1060
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1061
- that the version used will correspond to the version of the Python interpreter used to start the run.
1062
220
  """
1063
221
  ...
1064
222
 
@@ -1078,181 +236,6 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
1078
236
  attributes set in the `@pyi_base` flow-level decorator. Hence,
1079
237
  you can use `@pypi_base` to set packages required by all
1080
238
  steps and use `@pypi` to specify step-specific overrides.
1081
-
1082
- Parameters
1083
- ----------
1084
- packages : Dict[str, str], default: {}
1085
- Packages to use for this step. The key is the name of the package
1086
- and the value is the version to use.
1087
- python : str, optional, default: None
1088
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1089
- that the version used will correspond to the version of the Python interpreter used to start the run.
1090
- """
1091
- ...
1092
-
1093
- @typing.overload
1094
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1095
- """
1096
- Decorator prototype for all step decorators. This function gets specialized
1097
- and imported for all decorators types by _import_plugin_decorators().
1098
- """
1099
- ...
1100
-
1101
- @typing.overload
1102
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1103
- ...
1104
-
1105
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1106
- """
1107
- Decorator prototype for all step decorators. This function gets specialized
1108
- and imported for all decorators types by _import_plugin_decorators().
1109
- """
1110
- ...
1111
-
1112
- @typing.overload
1113
- def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1114
- """
1115
- Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
1116
-
1117
- Parameters
1118
- ----------
1119
- cpu : int, default 1
1120
- Number of CPUs required for this step. If `@resources` is
1121
- also present, the maximum value from all decorators is used.
1122
- gpu : int, default 0
1123
- Number of GPUs required for this step. If `@resources` is
1124
- also present, the maximum value from all decorators is used.
1125
- memory : int, default 4096
1126
- Memory size (in MB) required for this step. If
1127
- `@resources` is also present, the maximum value from all decorators is
1128
- used.
1129
- image : str, optional, default None
1130
- Docker image to use when launching on AWS Batch. If not specified, and
1131
- METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
1132
- not, a default Docker image mapping to the current version of Python is used.
1133
- queue : str, default METAFLOW_BATCH_JOB_QUEUE
1134
- AWS Batch Job Queue to submit the job to.
1135
- iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
1136
- AWS IAM role that AWS Batch container uses to access AWS cloud resources.
1137
- execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
1138
- AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
1139
- (https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
1140
- shared_memory : int, optional, default None
1141
- The value for the size (in MiB) of the /dev/shm volume for this step.
1142
- This parameter maps to the `--shm-size` option in Docker.
1143
- max_swap : int, optional, default None
1144
- The total amount of swap memory (in MiB) a container can use for this
1145
- step. This parameter is translated to the `--memory-swap` option in
1146
- Docker where the value is the sum of the container memory plus the
1147
- `max_swap` value.
1148
- swappiness : int, optional, default None
1149
- This allows you to tune memory swappiness behavior for this step.
1150
- A swappiness value of 0 causes swapping not to happen unless absolutely
1151
- necessary. A swappiness value of 100 causes pages to be swapped very
1152
- aggressively. Accepted values are whole numbers between 0 and 100.
1153
- use_tmpfs : bool, default False
1154
- This enables an explicit tmpfs mount for this step. Note that tmpfs is
1155
- not available on Fargate compute environments
1156
- tmpfs_tempdir : bool, default True
1157
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1158
- tmpfs_size : int, optional, default None
1159
- The value for the size (in MiB) of the tmpfs mount for this step.
1160
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1161
- memory allocated for this step.
1162
- tmpfs_path : str, optional, default None
1163
- Path to tmpfs mount for this step. Defaults to /metaflow_temp.
1164
- inferentia : int, default 0
1165
- Number of Inferentia chips required for this step.
1166
- trainium : int, default None
1167
- Alias for inferentia. Use only one of the two.
1168
- efa : int, default 0
1169
- Number of elastic fabric adapter network devices to attach to container
1170
- ephemeral_storage : int, default None
1171
- The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
1172
- This is only relevant for Fargate compute environments
1173
- log_driver: str, optional, default None
1174
- The log driver to use for the Amazon ECS container.
1175
- log_options: List[str], optional, default None
1176
- List of strings containing options for the chosen log driver. The configurable values
1177
- depend on the `log driver` chosen. Validation of these options is not supported yet.
1178
- Example: [`awslogs-group:aws/batch/job`]
1179
- """
1180
- ...
1181
-
1182
- @typing.overload
1183
- def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1184
- ...
1185
-
1186
- @typing.overload
1187
- def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1188
- ...
1189
-
1190
- def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
1191
- """
1192
- Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
1193
-
1194
- Parameters
1195
- ----------
1196
- cpu : int, default 1
1197
- Number of CPUs required for this step. If `@resources` is
1198
- also present, the maximum value from all decorators is used.
1199
- gpu : int, default 0
1200
- Number of GPUs required for this step. If `@resources` is
1201
- also present, the maximum value from all decorators is used.
1202
- memory : int, default 4096
1203
- Memory size (in MB) required for this step. If
1204
- `@resources` is also present, the maximum value from all decorators is
1205
- used.
1206
- image : str, optional, default None
1207
- Docker image to use when launching on AWS Batch. If not specified, and
1208
- METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
1209
- not, a default Docker image mapping to the current version of Python is used.
1210
- queue : str, default METAFLOW_BATCH_JOB_QUEUE
1211
- AWS Batch Job Queue to submit the job to.
1212
- iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
1213
- AWS IAM role that AWS Batch container uses to access AWS cloud resources.
1214
- execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
1215
- AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
1216
- (https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
1217
- shared_memory : int, optional, default None
1218
- The value for the size (in MiB) of the /dev/shm volume for this step.
1219
- This parameter maps to the `--shm-size` option in Docker.
1220
- max_swap : int, optional, default None
1221
- The total amount of swap memory (in MiB) a container can use for this
1222
- step. This parameter is translated to the `--memory-swap` option in
1223
- Docker where the value is the sum of the container memory plus the
1224
- `max_swap` value.
1225
- swappiness : int, optional, default None
1226
- This allows you to tune memory swappiness behavior for this step.
1227
- A swappiness value of 0 causes swapping not to happen unless absolutely
1228
- necessary. A swappiness value of 100 causes pages to be swapped very
1229
- aggressively. Accepted values are whole numbers between 0 and 100.
1230
- use_tmpfs : bool, default False
1231
- This enables an explicit tmpfs mount for this step. Note that tmpfs is
1232
- not available on Fargate compute environments
1233
- tmpfs_tempdir : bool, default True
1234
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1235
- tmpfs_size : int, optional, default None
1236
- The value for the size (in MiB) of the tmpfs mount for this step.
1237
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1238
- memory allocated for this step.
1239
- tmpfs_path : str, optional, default None
1240
- Path to tmpfs mount for this step. Defaults to /metaflow_temp.
1241
- inferentia : int, default 0
1242
- Number of Inferentia chips required for this step.
1243
- trainium : int, default None
1244
- Alias for inferentia. Use only one of the two.
1245
- efa : int, default 0
1246
- Number of elastic fabric adapter network devices to attach to container
1247
- ephemeral_storage : int, default None
1248
- The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
1249
- This is only relevant for Fargate compute environments
1250
- log_driver: str, optional, default None
1251
- The log driver to use for the Amazon ECS container.
1252
- log_options: List[str], optional, default None
1253
- List of strings containing options for the chosen log driver. The configurable values
1254
- depend on the `log driver` chosen. Validation of these options is not supported yet.
1255
- Example: [`awslogs-group:aws/batch/job`]
1256
239
  """
1257
240
  ...
1258
241
 
@@ -1265,15 +248,6 @@ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) ->
1265
248
  contains the exception raised. You can use it to detect the presence
1266
249
  of errors, indicating that all happy-path artifacts produced by the step
1267
250
  are missing.
1268
-
1269
- Parameters
1270
- ----------
1271
- var : str, optional, default None
1272
- Name of the artifact in which to store the caught exception.
1273
- If not specified, the exception is not stored.
1274
- print_exception : bool, default True
1275
- Determines whether or not the exception is printed to
1276
- stdout when caught.
1277
251
  """
1278
252
  ...
1279
253
 
@@ -1293,510 +267,227 @@ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
1293
267
  contains the exception raised. You can use it to detect the presence
1294
268
  of errors, indicating that all happy-path artifacts produced by the step
1295
269
  are missing.
1296
-
1297
- Parameters
1298
- ----------
1299
- var : str, optional, default None
1300
- Name of the artifact in which to store the caught exception.
1301
- If not specified, the exception is not stored.
1302
- print_exception : bool, default True
1303
- Determines whether or not the exception is printed to
1304
- stdout when caught.
1305
270
  """
1306
271
  ...
1307
272
 
1308
273
  @typing.overload
1309
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
274
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1310
275
  """
1311
- Specifies environment variables to be set prior to the execution of a step.
276
+ Specifies the resources needed when executing this step.
1312
277
 
1313
- Parameters
1314
- ----------
1315
- vars : Dict[str, str], default {}
1316
- Dictionary of environment variables to set.
278
+ Use `@resources` to specify the resource requirements
279
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
280
+
281
+ You can choose the compute layer on the command line by executing e.g.
282
+ ```
283
+ python myflow.py run --with batch
284
+ ```
285
+ or
286
+ ```
287
+ python myflow.py run --with kubernetes
288
+ ```
289
+ which executes the flow on the desired system using the
290
+ requirements specified in `@resources`.
1317
291
  """
1318
292
  ...
1319
293
 
1320
294
  @typing.overload
1321
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
295
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1322
296
  ...
1323
297
 
1324
298
  @typing.overload
1325
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
299
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1326
300
  ...
1327
301
 
1328
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
302
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1329
303
  """
1330
- Specifies environment variables to be set prior to the execution of a step.
304
+ Specifies the resources needed when executing this step.
1331
305
 
1332
- Parameters
1333
- ----------
1334
- vars : Dict[str, str], default {}
1335
- Dictionary of environment variables to set.
306
+ Use `@resources` to specify the resource requirements
307
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
308
+
309
+ You can choose the compute layer on the command line by executing e.g.
310
+ ```
311
+ python myflow.py run --with batch
312
+ ```
313
+ or
314
+ ```
315
+ python myflow.py run --with kubernetes
316
+ ```
317
+ which executes the flow on the desired system using the
318
+ requirements specified in `@resources`.
1336
319
  """
1337
320
  ...
1338
321
 
1339
322
  @typing.overload
1340
- def card(*, type: str = "default", id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
323
+ def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = 'METAFLOW_BATCH_JOB_QUEUE', iam_role: str = 'METAFLOW_ECS_S3_ACCESS_IAM_ROLE', execution_role: str = 'METAFLOW_ECS_FARGATE_EXECUTION_ROLE', shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1341
324
  """
1342
- Creates a human-readable report, a Metaflow Card, after this step completes.
1343
-
1344
- Note that you may add multiple `@card` decorators in a step with different parameters.
1345
-
1346
- Parameters
1347
- ----------
1348
- type : str, default 'default'
1349
- Card type.
1350
- id : str, optional, default None
1351
- If multiple cards are present, use this id to identify this card.
1352
- options : Dict[str, Any], default {}
1353
- Options passed to the card. The contents depend on the card type.
1354
- timeout : int, default 45
1355
- Interrupt reporting if it takes more than this many seconds.
1356
-
1357
-
325
+ Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
1358
326
  """
1359
327
  ...
1360
328
 
1361
329
  @typing.overload
1362
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
330
+ def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1363
331
  ...
1364
332
 
1365
333
  @typing.overload
1366
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
334
+ def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1367
335
  ...
1368
336
 
1369
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = "default", id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
337
+ def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = 'METAFLOW_BATCH_JOB_QUEUE', iam_role: str = 'METAFLOW_ECS_S3_ACCESS_IAM_ROLE', execution_role: str = 'METAFLOW_ECS_FARGATE_EXECUTION_ROLE', shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
1370
338
  """
1371
- Creates a human-readable report, a Metaflow Card, after this step completes.
1372
-
1373
- Note that you may add multiple `@card` decorators in a step with different parameters.
1374
-
1375
- Parameters
1376
- ----------
1377
- type : str, default 'default'
1378
- Card type.
1379
- id : str, optional, default None
1380
- If multiple cards are present, use this id to identify this card.
1381
- options : Dict[str, Any], default {}
1382
- Options passed to the card. The contents depend on the card type.
1383
- timeout : int, default 45
1384
- Interrupt reporting if it takes more than this many seconds.
1385
-
1386
-
339
+ Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
1387
340
  """
1388
341
  ...
1389
342
 
1390
343
  @typing.overload
1391
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
344
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1392
345
  """
1393
- Specifies secrets to be retrieved and injected as environment variables prior to
1394
- the execution of a step.
346
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1395
347
 
1396
- Parameters
1397
- ----------
1398
- sources : List[Union[str, Dict[str, Any]]], default: []
1399
- List of secret specs, defining how the secrets are to be retrieved
348
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1400
349
  """
1401
350
  ...
1402
351
 
1403
352
  @typing.overload
1404
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
353
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1405
354
  ...
1406
355
 
1407
356
  @typing.overload
1408
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
357
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1409
358
  ...
1410
359
 
1411
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
360
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1412
361
  """
1413
- Specifies secrets to be retrieved and injected as environment variables prior to
1414
- the execution of a step.
362
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1415
363
 
1416
- Parameters
1417
- ----------
1418
- sources : List[Union[str, Dict[str, Any]]], default: []
1419
- List of secret specs, defining how the secrets are to be retrieved
364
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1420
365
  """
1421
366
  ...
1422
367
 
1423
368
  @typing.overload
1424
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
369
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1425
370
  """
1426
- Specifies the Conda environment for the step.
371
+ Specifies a timeout for your step.
1427
372
 
1428
- Information in this decorator will augment any
1429
- attributes set in the `@conda_base` flow-level decorator. Hence,
1430
- you can use `@conda_base` to set packages required by all
1431
- steps and use `@conda` to specify step-specific overrides.
373
+ This decorator is useful if this step may hang indefinitely.
1432
374
 
1433
- Parameters
1434
- ----------
1435
- packages : Dict[str, str], default {}
1436
- Packages to use for this step. The key is the name of the package
1437
- and the value is the version to use.
1438
- libraries : Dict[str, str], default {}
1439
- Supported for backward compatibility. When used with packages, packages will take precedence.
1440
- python : str, optional, default None
1441
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1442
- that the version used will correspond to the version of the Python interpreter used to start the run.
1443
- disabled : bool, default False
1444
- If set to True, disables @conda.
375
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
376
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
377
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
378
+
379
+ Note that all the values specified in parameters are added together so if you specify
380
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1445
381
  """
1446
382
  ...
1447
383
 
1448
384
  @typing.overload
1449
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
385
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1450
386
  ...
1451
387
 
1452
388
  @typing.overload
1453
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
389
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1454
390
  ...
1455
391
 
1456
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
392
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1457
393
  """
1458
- Specifies the Conda environment for the step.
394
+ Specifies a timeout for your step.
1459
395
 
1460
- Information in this decorator will augment any
1461
- attributes set in the `@conda_base` flow-level decorator. Hence,
1462
- you can use `@conda_base` to set packages required by all
1463
- steps and use `@conda` to specify step-specific overrides.
396
+ This decorator is useful if this step may hang indefinitely.
1464
397
 
1465
- Parameters
1466
- ----------
1467
- packages : Dict[str, str], default {}
1468
- Packages to use for this step. The key is the name of the package
1469
- and the value is the version to use.
1470
- libraries : Dict[str, str], default {}
1471
- Supported for backward compatibility. When used with packages, packages will take precedence.
1472
- python : str, optional, default None
1473
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1474
- that the version used will correspond to the version of the Python interpreter used to start the run.
1475
- disabled : bool, default False
1476
- If set to True, disables @conda.
398
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
399
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
400
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
401
+
402
+ Note that all the values specified in parameters are added together so if you specify
403
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1477
404
  """
1478
405
  ...
1479
406
 
1480
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = "KUBERNETES_IMAGE_PULL_POLICY", service_account: str = "METAFLOW_KUBERNETES_SERVICE_ACCOUNT", secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = "METAFLOW_KUBERNETES_NAMESPACE", gpu: typing.Optional[int] = None, gpu_vendor: str = "KUBERNETES_GPU_VENDOR", tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = "/metaflow_temp", persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
407
+ @typing.overload
408
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1481
409
  """
1482
- Specifies that this step should execute on Kubernetes.
1483
-
1484
- Parameters
1485
- ----------
1486
- cpu : int, default 1
1487
- Number of CPUs required for this step. If `@resources` is
1488
- also present, the maximum value from all decorators is used.
1489
- memory : int, default 4096
1490
- Memory size (in MB) required for this step. If
1491
- `@resources` is also present, the maximum value from all decorators is
1492
- used.
1493
- disk : int, default 10240
1494
- Disk size (in MB) required for this step. If
1495
- `@resources` is also present, the maximum value from all decorators is
1496
- used.
1497
- image : str, optional, default None
1498
- Docker image to use when launching on Kubernetes. If not specified, and
1499
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1500
- not, a default Docker image mapping to the current version of Python is used.
1501
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1502
- If given, the imagePullPolicy to be applied to the Docker image of the step.
1503
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1504
- Kubernetes service account to use when launching pod in Kubernetes.
1505
- secrets : List[str], optional, default None
1506
- Kubernetes secrets to use when launching pod in Kubernetes. These
1507
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1508
- in Metaflow configuration.
1509
- node_selector: Union[Dict[str,str], str], optional, default None
1510
- Kubernetes node selector(s) to apply to the pod running the task.
1511
- Can be passed in as a comma separated string of values e.g. "kubernetes.io/os=linux,kubernetes.io/arch=amd64"
1512
- or as a dictionary {"kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64"}
1513
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1514
- Kubernetes namespace to use when launching pod in Kubernetes.
1515
- gpu : int, optional, default None
1516
- Number of GPUs required for this step. A value of zero implies that
1517
- the scheduled node should not have GPUs.
1518
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1519
- The vendor of the GPUs to be used for this step.
1520
- tolerations : List[str], default []
1521
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1522
- Kubernetes tolerations to use when launching pod in Kubernetes.
1523
- use_tmpfs : bool, default False
1524
- This enables an explicit tmpfs mount for this step.
1525
- tmpfs_tempdir : bool, default True
1526
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1527
- tmpfs_size : int, optional, default: None
1528
- The value for the size (in MiB) of the tmpfs mount for this step.
1529
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1530
- memory allocated for this step.
1531
- tmpfs_path : str, optional, default /metaflow_temp
1532
- Path to tmpfs mount for this step.
1533
- persistent_volume_claims : Dict[str, str], optional, default None
1534
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1535
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1536
- shared_memory: int, optional
1537
- Shared memory size (in MiB) required for this step
1538
- port: int, optional
1539
- Port number to specify in the Kubernetes job object
1540
- compute_pool : str, optional, default None
1541
- Compute pool to be used for for this step.
1542
- If not specified, any accessible compute pool within the perimeter is used.
1543
- hostname_resolution_timeout: int, default 10 * 60
1544
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
1545
- Only applicable when @parallel is used.
410
+ Specifies secrets to be retrieved and injected as environment variables prior to
411
+ the execution of a step.
1546
412
  """
1547
413
  ...
1548
414
 
1549
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
415
+ @typing.overload
416
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
417
+ ...
418
+
419
+ @typing.overload
420
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
421
+ ...
422
+
423
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
1550
424
  """
1551
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1552
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1553
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1554
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1555
- starts only after all sensors finish.
1556
-
1557
- Parameters
1558
- ----------
1559
- timeout : int
1560
- Time, in seconds before the task times out and fails. (Default: 3600)
1561
- poke_interval : int
1562
- Time in seconds that the job should wait in between each try. (Default: 60)
1563
- mode : str
1564
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1565
- exponential_backoff : bool
1566
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1567
- pool : str
1568
- the slot pool this task should run in,
1569
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1570
- soft_fail : bool
1571
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1572
- name : str
1573
- Name of the sensor on Airflow
1574
- description : str
1575
- Description of sensor in the Airflow UI
1576
- bucket_key : Union[str, List[str]]
1577
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1578
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1579
- bucket_name : str
1580
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1581
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1582
- wildcard_match : bool
1583
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1584
- aws_conn_id : str
1585
- a reference to the s3 connection on Airflow. (Default: None)
1586
- verify : bool
1587
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
425
+ Specifies secrets to be retrieved and injected as environment variables prior to
426
+ the execution of a step.
1588
427
  """
1589
428
  ...
1590
429
 
1591
- def project(*, name: str) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
430
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1592
431
  """
1593
- Specifies what flows belong to the same project.
1594
-
1595
- A project-specific namespace is created for all flows that
1596
- use the same `@project(name)`.
1597
-
1598
- Parameters
1599
- ----------
1600
- name : str
1601
- Project name. Make sure that the name is unique amongst all
1602
- projects that use the same production scheduler. The name may
1603
- contain only lowercase alphanumeric characters and underscores.
1604
-
1605
-
432
+ Specifies that this step should execute on Kubernetes.
1606
433
  """
1607
434
  ...
1608
435
 
1609
436
  @typing.overload
1610
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
437
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1611
438
  """
1612
- Specifies the Conda environment for all steps of the flow.
439
+ Specifies the number of times the task corresponding
440
+ to a step needs to be retried.
1613
441
 
1614
- Use `@conda_base` to set common libraries required by all
1615
- steps and use `@conda` to specify step-specific additions.
442
+ This decorator is useful for handling transient errors, such as networking issues.
443
+ If your task contains operations that can't be retried safely, e.g. database updates,
444
+ it is advisable to annotate it with `@retry(times=0)`.
1616
445
 
1617
- Parameters
1618
- ----------
1619
- packages : Dict[str, str], default {}
1620
- Packages to use for this flow. The key is the name of the package
1621
- and the value is the version to use.
1622
- libraries : Dict[str, str], default {}
1623
- Supported for backward compatibility. When used with packages, packages will take precedence.
1624
- python : str, optional, default None
1625
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1626
- that the version used will correspond to the version of the Python interpreter used to start the run.
1627
- disabled : bool, default False
1628
- If set to True, disables Conda.
446
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
447
+ decorator will execute a no-op task after all retries have been exhausted,
448
+ ensuring that the flow execution can continue.
1629
449
  """
1630
450
  ...
1631
451
 
1632
452
  @typing.overload
1633
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
453
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1634
454
  ...
1635
455
 
1636
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1637
- """
1638
- Specifies the Conda environment for all steps of the flow.
1639
-
1640
- Use `@conda_base` to set common libraries required by all
1641
- steps and use `@conda` to specify step-specific additions.
1642
-
1643
- Parameters
1644
- ----------
1645
- packages : Dict[str, str], default {}
1646
- Packages to use for this flow. The key is the name of the package
1647
- and the value is the version to use.
1648
- libraries : Dict[str, str], default {}
1649
- Supported for backward compatibility. When used with packages, packages will take precedence.
1650
- python : str, optional, default None
1651
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1652
- that the version used will correspond to the version of the Python interpreter used to start the run.
1653
- disabled : bool, default False
1654
- If set to True, disables Conda.
1655
- """
456
+ @typing.overload
457
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1656
458
  ...
1657
459
 
1658
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
460
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1659
461
  """
1660
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1661
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
462
+ Specifies the number of times the task corresponding
463
+ to a step needs to be retried.
1662
464
 
1663
- Parameters
1664
- ----------
1665
- timeout : int
1666
- Time, in seconds before the task times out and fails. (Default: 3600)
1667
- poke_interval : int
1668
- Time in seconds that the job should wait in between each try. (Default: 60)
1669
- mode : str
1670
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1671
- exponential_backoff : bool
1672
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1673
- pool : str
1674
- the slot pool this task should run in,
1675
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1676
- soft_fail : bool
1677
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1678
- name : str
1679
- Name of the sensor on Airflow
1680
- description : str
1681
- Description of sensor in the Airflow UI
1682
- external_dag_id : str
1683
- The dag_id that contains the task you want to wait for.
1684
- external_task_ids : List[str]
1685
- The list of task_ids that you want to wait for.
1686
- If None (default value) the sensor waits for the DAG. (Default: None)
1687
- allowed_states : List[str]
1688
- Iterable of allowed states, (Default: ['success'])
1689
- failed_states : List[str]
1690
- Iterable of failed or dis-allowed states. (Default: None)
1691
- execution_delta : datetime.timedelta
1692
- time difference with the previous execution to look at,
1693
- the default is the same logical date as the current task or DAG. (Default: None)
1694
- check_existence: bool
1695
- Set to True to check if the external task exists or check if
1696
- the DAG to wait for exists. (Default: True)
465
+ This decorator is useful for handling transient errors, such as networking issues.
466
+ If your task contains operations that can't be retried safely, e.g. database updates,
467
+ it is advisable to annotate it with `@retry(times=0)`.
468
+
469
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
470
+ decorator will execute a no-op task after all retries have been exhausted,
471
+ ensuring that the flow execution can continue.
1697
472
  """
1698
473
  ...
1699
474
 
1700
- @typing.overload
1701
- def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
475
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1702
476
  """
1703
- Specifies the flow(s) that this flow depends on.
1704
-
1705
- ```
1706
- @trigger_on_finish(flow='FooFlow')
1707
- ```
1708
- or
1709
- ```
1710
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1711
- ```
1712
- This decorator respects the @project decorator and triggers the flow
1713
- when upstream runs within the same namespace complete successfully
1714
-
1715
- Additionally, you can specify project aware upstream flow dependencies
1716
- by specifying the fully qualified project_flow_name.
1717
- ```
1718
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1719
- ```
1720
- or
1721
- ```
1722
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1723
- ```
1724
-
1725
- You can also specify just the project or project branch (other values will be
1726
- inferred from the current project or project branch):
1727
- ```
1728
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1729
- ```
1730
-
1731
- Note that `branch` is typically one of:
1732
- - `prod`
1733
- - `user.bob`
1734
- - `test.my_experiment`
1735
- - `prod.staging`
1736
-
1737
- Parameters
1738
- ----------
1739
- flow : Union[str, Dict[str, str]], optional, default None
1740
- Upstream flow dependency for this flow.
1741
- flows : List[Union[str, Dict[str, str]]], default []
1742
- Upstream flow dependencies for this flow.
1743
- options : Dict[str, Any], default {}
1744
- Backend-specific configuration for tuning eventing behavior.
1745
-
1746
-
477
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
478
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
479
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
480
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
481
+ starts only after all sensors finish.
1747
482
  """
1748
483
  ...
1749
484
 
1750
- @typing.overload
1751
- def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1752
- ...
1753
-
1754
- def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
485
+ def project(*, name: str) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1755
486
  """
1756
- Specifies the flow(s) that this flow depends on.
1757
-
1758
- ```
1759
- @trigger_on_finish(flow='FooFlow')
1760
- ```
1761
- or
1762
- ```
1763
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1764
- ```
1765
- This decorator respects the @project decorator and triggers the flow
1766
- when upstream runs within the same namespace complete successfully
1767
-
1768
- Additionally, you can specify project aware upstream flow dependencies
1769
- by specifying the fully qualified project_flow_name.
1770
- ```
1771
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1772
- ```
1773
- or
1774
- ```
1775
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1776
- ```
1777
-
1778
- You can also specify just the project or project branch (other values will be
1779
- inferred from the current project or project branch):
1780
- ```
1781
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1782
- ```
1783
-
1784
- Note that `branch` is typically one of:
1785
- - `prod`
1786
- - `user.bob`
1787
- - `test.my_experiment`
1788
- - `prod.staging`
1789
-
1790
- Parameters
1791
- ----------
1792
- flow : Union[str, Dict[str, str]], optional, default None
1793
- Upstream flow dependency for this flow.
1794
- flows : List[Union[str, Dict[str, str]]], default []
1795
- Upstream flow dependencies for this flow.
1796
- options : Dict[str, Any], default {}
1797
- Backend-specific configuration for tuning eventing behavior.
1798
-
487
+ Specifies what flows belong to the same project.
1799
488
 
489
+ A project-specific namespace is created for all flows that
490
+ use the same `@project(name)`.
1800
491
  """
1801
492
  ...
1802
493
 
@@ -1832,17 +523,6 @@ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = No
1832
523
  ```
1833
524
  @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1834
525
  ```
1835
-
1836
- Parameters
1837
- ----------
1838
- event : Union[str, Dict[str, Any]], optional, default None
1839
- Event dependency for this flow.
1840
- events : List[Union[str, Dict[str, Any]]], default []
1841
- Events dependency for this flow.
1842
- options : Dict[str, Any], default {}
1843
- Backend-specific configuration for tuning eventing behavior.
1844
-
1845
-
1846
526
  """
1847
527
  ...
1848
528
 
@@ -1881,56 +561,6 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1881
561
  ```
1882
562
  @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1883
563
  ```
1884
-
1885
- Parameters
1886
- ----------
1887
- event : Union[str, Dict[str, Any]], optional, default None
1888
- Event dependency for this flow.
1889
- events : List[Union[str, Dict[str, Any]]], default []
1890
- Events dependency for this flow.
1891
- options : Dict[str, Any], default {}
1892
- Backend-specific configuration for tuning eventing behavior.
1893
-
1894
-
1895
- """
1896
- ...
1897
-
1898
- @typing.overload
1899
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1900
- """
1901
- Specifies the PyPI packages for all steps of the flow.
1902
-
1903
- Use `@pypi_base` to set common packages required by all
1904
- steps and use `@pypi` to specify step-specific overrides.
1905
- Parameters
1906
- ----------
1907
- packages : Dict[str, str], default: {}
1908
- Packages to use for this flow. The key is the name of the package
1909
- and the value is the version to use.
1910
- python : str, optional, default: None
1911
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1912
- that the version used will correspond to the version of the Python interpreter used to start the run.
1913
- """
1914
- ...
1915
-
1916
- @typing.overload
1917
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1918
- ...
1919
-
1920
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1921
- """
1922
- Specifies the PyPI packages for all steps of the flow.
1923
-
1924
- Use `@pypi_base` to set common packages required by all
1925
- steps and use `@pypi` to specify step-specific overrides.
1926
- Parameters
1927
- ----------
1928
- packages : Dict[str, str], default: {}
1929
- Packages to use for this flow. The key is the name of the package
1930
- and the value is the version to use.
1931
- python : str, optional, default: None
1932
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1933
- that the version used will correspond to the version of the Python interpreter used to start the run.
1934
564
  """
1935
565
  ...
1936
566
 
@@ -1939,21 +569,6 @@ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False,
1939
569
  """
1940
570
  Specifies the times when the flow should be run when running on a
1941
571
  production scheduler.
1942
-
1943
- Parameters
1944
- ----------
1945
- hourly : bool, default False
1946
- Run the workflow hourly.
1947
- daily : bool, default True
1948
- Run the workflow daily.
1949
- weekly : bool, default False
1950
- Run the workflow weekly.
1951
- cron : str, optional, default None
1952
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1953
- specified by this expression.
1954
- timezone : str, optional, default None
1955
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1956
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1957
572
  """
1958
573
  ...
1959
574
 
@@ -1965,1459 +580,140 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1965
580
  """
1966
581
  Specifies the times when the flow should be run when running on a
1967
582
  production scheduler.
1968
-
1969
- Parameters
1970
- ----------
1971
- hourly : bool, default False
1972
- Run the workflow hourly.
1973
- daily : bool, default True
1974
- Run the workflow daily.
1975
- weekly : bool, default False
1976
- Run the workflow weekly.
1977
- cron : str, optional, default None
1978
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1979
- specified by this expression.
1980
- timezone : str, optional, default None
1981
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1982
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1983
583
  """
1984
584
  ...
1985
585
 
1986
- def namespace(ns: typing.Optional[str]) -> typing.Optional[str]:
586
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1987
587
  """
1988
- Switch namespace to the one provided.
1989
-
1990
- This call has a global effect. No objects outside this namespace
1991
- will be accessible. To access all objects regardless of namespaces,
1992
- pass None to this call.
1993
-
1994
- Parameters
1995
- ----------
1996
- ns : str, optional
1997
- Namespace to switch to or None to ignore namespaces.
1998
-
1999
- Returns
2000
- -------
2001
- str, optional
2002
- Namespace set (result of get_namespace()).
588
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
589
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
2003
590
  """
2004
591
  ...
2005
592
 
2006
- def get_namespace() -> typing.Optional[str]:
593
+ @typing.overload
594
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
2007
595
  """
2008
- Return the current namespace that is currently being used to filter objects.
2009
-
2010
- The namespace is a tag associated with all objects in Metaflow.
596
+ Specifies the PyPI packages for all steps of the flow.
2011
597
 
2012
- Returns
2013
- -------
2014
- str, optional
2015
- The current namespace used to filter objects.
598
+ Use `@pypi_base` to set common packages required by all
599
+ steps and use `@pypi` to specify step-specific overrides.
2016
600
  """
2017
601
  ...
2018
602
 
2019
- def default_namespace() -> str:
603
+ @typing.overload
604
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
605
+ ...
606
+
607
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
2020
608
  """
2021
- Resets the namespace used to filter objects to the default one, i.e. the one that was
2022
- used prior to any `namespace` calls.
609
+ Specifies the PyPI packages for all steps of the flow.
2023
610
 
2024
- Returns
2025
- -------
2026
- str
2027
- The result of get_namespace() after the namespace has been reset.
611
+ Use `@pypi_base` to set common packages required by all
612
+ steps and use `@pypi` to specify step-specific overrides.
2028
613
  """
2029
614
  ...
2030
615
 
2031
- def get_metadata() -> str:
616
+ @typing.overload
617
+ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
2032
618
  """
2033
- Returns the current Metadata provider.
619
+ Specifies the flow(s) that this flow depends on.
2034
620
 
2035
- If this is not set explicitly using `metadata`, the default value is
2036
- determined through the Metaflow configuration. You can use this call to
2037
- check that your configuration is set up properly.
621
+ ```
622
+ @trigger_on_finish(flow='FooFlow')
623
+ ```
624
+ or
625
+ ```
626
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
627
+ ```
628
+ This decorator respects the @project decorator and triggers the flow
629
+ when upstream runs within the same namespace complete successfully
630
+
631
+ Additionally, you can specify project aware upstream flow dependencies
632
+ by specifying the fully qualified project_flow_name.
633
+ ```
634
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
635
+ ```
636
+ or
637
+ ```
638
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
639
+ ```
2038
640
 
2039
- If multiple configuration profiles are present, this call returns the one
2040
- selected through the `METAFLOW_PROFILE` environment variable.
641
+ You can also specify just the project or project branch (other values will be
642
+ inferred from the current project or project branch):
643
+ ```
644
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
645
+ ```
2041
646
 
2042
- Returns
2043
- -------
2044
- str
2045
- Information about the Metadata provider currently selected. This information typically
2046
- returns provider specific information (like URL for remote providers or local paths for
2047
- local providers).
647
+ Note that `branch` is typically one of:
648
+ - `prod`
649
+ - `user.bob`
650
+ - `test.my_experiment`
651
+ - `prod.staging`
2048
652
  """
2049
653
  ...
2050
654
 
2051
- def default_metadata() -> str:
2052
- """
2053
- Resets the Metadata provider to the default value, that is, to the value
2054
- that was used prior to any `metadata` calls.
2055
-
2056
- Returns
2057
- -------
2058
- str
2059
- The result of get_metadata() after resetting the provider.
2060
- """
655
+ @typing.overload
656
+ def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
2061
657
  ...
2062
658
 
2063
- class Metaflow(object, metaclass=type):
659
+ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
2064
660
  """
2065
- Entry point to all objects in the Metaflow universe.
2066
-
2067
- This object can be used to list all the flows present either through the explicit property
2068
- or by iterating over this object.
2069
-
2070
- Attributes
2071
- ----------
2072
- flows : List[Flow]
2073
- Returns the list of all `Flow` objects known to this metadata provider. Note that only
2074
- flows present in the current namespace will be returned. A `Flow` is present in a namespace
2075
- if it has at least one run in the namespace.
2076
- """
2077
- def __init__(self, _current_metadata: typing.Optional[str] = None):
2078
- ...
2079
- @property
2080
- def flows(self) -> typing.List[metaflow.client.core.Flow]:
2081
- """
2082
- Returns a list of all the flows present.
2083
-
2084
- Only flows present in the set namespace are returned. A flow is present in a namespace if
2085
- it has at least one run that is in the namespace.
2086
-
2087
- Returns
2088
- -------
2089
- List[Flow]
2090
- List of all flows present.
2091
- """
2092
- ...
2093
- def __iter__(self) -> typing.Iterator[metaflow.client.core.Flow]:
2094
- """
2095
- Iterator over all flows present.
2096
-
2097
- Only flows present in the set namespace are returned. A flow is present in a
2098
- namespace if it has at least one run that is in the namespace.
2099
-
2100
- Yields
2101
- -------
2102
- Flow
2103
- A Flow present in the Metaflow universe.
2104
- """
2105
- ...
2106
- def __str__(self) -> str:
2107
- ...
2108
- def __getitem__(self, name: str) -> metaflow.client.core.Flow:
2109
- """
2110
- Returns a specific flow by name.
2111
-
2112
- The flow will only be returned if it is present in the current namespace.
2113
-
2114
- Parameters
2115
- ----------
2116
- name : str
2117
- Name of the Flow
2118
-
2119
- Returns
2120
- -------
2121
- Flow
2122
- Flow with the given name.
2123
- """
2124
- ...
2125
- ...
2126
-
2127
- class Flow(metaflow.client.core.MetaflowObject, metaclass=type):
2128
- """
2129
- A Flow represents all existing flows with a certain name, in other words,
2130
- classes derived from `FlowSpec`. A container of `Run` objects.
2131
-
2132
- Attributes
2133
- ----------
2134
- latest_run : Run
2135
- Latest `Run` (in progress or completed, successfully or not) of this flow.
2136
- latest_successful_run : Run
2137
- Latest successfully completed `Run` of this flow.
2138
- """
2139
- def __init__(self, *args, **kwargs):
2140
- ...
2141
- @property
2142
- def latest_run(self) -> typing.Optional[metaflow.client.core.Run]:
2143
- """
2144
- Returns the latest run (either in progress or completed) of this flow.
2145
-
2146
- Note that an in-progress run may be returned by this call. Use latest_successful_run
2147
- to get an object representing a completed successful run.
2148
-
2149
- Returns
2150
- -------
2151
- Run, optional
2152
- Latest run of this flow
2153
- """
2154
- ...
2155
- @property
2156
- def latest_successful_run(self) -> typing.Optional[metaflow.client.core.Run]:
2157
- """
2158
- Returns the latest successful run of this flow.
2159
-
2160
- Returns
2161
- -------
2162
- Run, optional
2163
- Latest successful run of this flow
2164
- """
2165
- ...
2166
- def runs(self, *tags: str) -> typing.Iterator[metaflow.client.core.Run]:
2167
- """
2168
- Returns an iterator over all `Run`s of this flow.
2169
-
2170
- An optional filter is available that allows you to filter on tags.
2171
- If multiple tags are specified, only runs that have all the
2172
- specified tags are returned.
2173
-
2174
- Parameters
2175
- ----------
2176
- tags : str
2177
- Tags to match.
2178
-
2179
- Yields
2180
- ------
2181
- Run
2182
- `Run` objects in this flow.
2183
- """
2184
- ...
2185
- def __iter__(self) -> typing.Iterator[metaflow.client.core.Task]:
2186
- """
2187
- Iterate over all children Run of this Flow.
2188
-
2189
- Note that only runs in the current namespace are returned unless
2190
- _namespace_check is False
2191
-
2192
- Yields
2193
- ------
2194
- Run
2195
- A Run in this Flow
2196
- """
2197
- ...
2198
- def __getitem__(self, run_id: str) -> metaflow.client.core.Run:
2199
- """
2200
- Returns the Run object with the run ID 'run_id'
2201
-
2202
- Parameters
2203
- ----------
2204
- run_id : str
2205
- Run OD
2206
-
2207
- Returns
2208
- -------
2209
- Run
2210
- Run for this run ID in this Flow
2211
-
2212
- Raises
2213
- ------
2214
- KeyError
2215
- If the run_id does not identify a valid Run object
2216
- """
2217
- ...
2218
- def __getstate__(self):
2219
- ...
2220
- def __setstate__(self, state):
2221
- ...
2222
- ...
2223
-
2224
- class Run(metaflow.client.core.MetaflowObject, metaclass=type):
2225
- """
2226
- A `Run` represents an execution of a `Flow`. It is a container of `Step`s.
2227
-
2228
- Attributes
2229
- ----------
2230
- data : MetaflowData
2231
- a shortcut to run['end'].task.data, i.e. data produced by this run.
2232
- successful : bool
2233
- True if the run completed successfully.
2234
- finished : bool
2235
- True if the run completed.
2236
- finished_at : datetime
2237
- Time this run finished.
2238
- code : MetaflowCode
2239
- Code package for this run (if present). See `MetaflowCode`.
2240
- trigger : MetaflowTrigger
2241
- Information about event(s) that triggered this run (if present). See `MetaflowTrigger`.
2242
- end_task : Task
2243
- `Task` for the end step (if it is present already).
2244
- """
2245
- def steps(self, *tags: str) -> typing.Iterator[metaflow.client.core.Step]:
2246
- """
2247
- [Legacy function - do not use]
2248
-
2249
- Returns an iterator over all `Step` objects in the step. This is an alias
2250
- to iterating the object itself, i.e.
2251
- ```
2252
- list(Run(...)) == list(Run(...).steps())
2253
- ```
2254
-
2255
- Parameters
2256
- ----------
2257
- tags : str
2258
- No op (legacy functionality)
2259
-
2260
- Yields
2261
- ------
2262
- Step
2263
- `Step` objects in this run.
2264
- """
2265
- ...
2266
- @property
2267
- def code(self) -> typing.Optional[metaflow.client.core.MetaflowCode]:
2268
- """
2269
- Returns the MetaflowCode object for this run, if present.
2270
- Code is packed if atleast one `Step` runs remotely, else None is returned.
2271
-
2272
- Returns
2273
- -------
2274
- MetaflowCode, optional
2275
- Code package for this run
2276
- """
2277
- ...
2278
- @property
2279
- def data(self) -> typing.Optional[metaflow.client.core.MetaflowData]:
2280
- """
2281
- Returns a container of data artifacts produced by this run.
2282
-
2283
- You can access data produced by this run as follows:
2284
- ```
2285
- print(run.data.my_var)
2286
- ```
2287
- This is a shorthand for `run['end'].task.data`. If the 'end' step has not yet
2288
- executed, returns None.
2289
-
2290
- Returns
2291
- -------
2292
- MetaflowData, optional
2293
- Container of all artifacts produced by this task
2294
- """
2295
- ...
2296
- @property
2297
- def successful(self) -> bool:
2298
- """
2299
- Indicates whether or not the run completed successfully.
2300
-
2301
- A run is successful if its 'end' step is successful.
2302
-
2303
- Returns
2304
- -------
2305
- bool
2306
- True if the run completed successfully and False otherwise
2307
- """
2308
- ...
2309
- @property
2310
- def finished(self) -> bool:
2311
- """
2312
- Indicates whether or not the run completed.
2313
-
2314
- A run completed if its 'end' step completed.
2315
-
2316
- Returns
2317
- -------
2318
- bool
2319
- True if the run completed and False otherwise
2320
- """
2321
- ...
2322
- @property
2323
- def finished_at(self) -> typing.Optional[datetime.datetime]:
2324
- """
2325
- Returns the datetime object of when the run finished (successfully or not).
2326
-
2327
- The completion time of a run is the same as the completion time of its 'end' step.
2328
- If the 'end' step has not completed, returns None.
2329
-
2330
- Returns
2331
- -------
2332
- datetime, optional
2333
- Datetime of when the run finished
2334
- """
2335
- ...
2336
- @property
2337
- def end_task(self) -> typing.Optional[metaflow.client.core.Task]:
2338
- """
2339
- Returns the Task corresponding to the 'end' step.
2340
-
2341
- This returns None if the end step does not yet exist.
2342
-
2343
- Returns
2344
- -------
2345
- Task, optional
2346
- The 'end' task
2347
- """
2348
- ...
2349
- def add_tag(self, tag: str):
2350
- """
2351
- Add a tag to this `Run`.
2352
-
2353
- Note that if the tag is already a system tag, it is not added as a user tag,
2354
- and no error is thrown.
2355
-
2356
- Parameters
2357
- ----------
2358
- tag : str
2359
- Tag to add.
2360
- """
2361
- ...
2362
- def add_tags(self, tags: typing.Iterable[str]):
2363
- """
2364
- Add one or more tags to this `Run`.
2365
-
2366
- Note that if any tag is already a system tag, it is not added as a user tag
2367
- and no error is thrown.
2368
-
2369
- Parameters
2370
- ----------
2371
- tags : Iterable[str]
2372
- Tags to add.
2373
- """
2374
- ...
2375
- def remove_tag(self, tag: str):
2376
- """
2377
- Remove one tag from this `Run`.
2378
-
2379
- Removing a system tag is an error. Removing a non-existent
2380
- user tag is a no-op.
2381
-
2382
- Parameters
2383
- ----------
2384
- tag : str
2385
- Tag to remove.
2386
- """
2387
- ...
2388
- def remove_tags(self, tags: typing.Iterable[str]):
2389
- """
2390
- Remove one or more tags to this `Run`.
2391
-
2392
- Removing a system tag will result in an error. Removing a non-existent
2393
- user tag is a no-op.
2394
-
2395
- Parameters
2396
- ----------
2397
- tags : Iterable[str]
2398
- Tags to remove.
2399
- """
2400
- ...
2401
- def replace_tag(self, tag_to_remove: str, tag_to_add: str):
2402
- """
2403
- Remove a tag and add a tag atomically. Removal is done first.
2404
- The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
2405
-
2406
- Parameters
2407
- ----------
2408
- tag_to_remove : str
2409
- Tag to remove.
2410
- tag_to_add : str
2411
- Tag to add.
2412
- """
2413
- ...
2414
- def replace_tags(self, tags_to_remove: typing.Iterable[str], tags_to_add: typing.Iterable[str]):
2415
- """
2416
- Remove and add tags atomically; the removal is done first.
2417
- The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
2418
-
2419
- Parameters
2420
- ----------
2421
- tags_to_remove : Iterable[str]
2422
- Tags to remove.
2423
- tags_to_add : Iterable[str]
2424
- Tags to add.
2425
- """
2426
- ...
2427
- def __iter__(self) -> typing.Iterator[metaflow.client.core.Step]:
2428
- """
2429
- Iterate over all children Step of this Run
2430
-
2431
- Yields
2432
- ------
2433
- Step
2434
- A Step in this Run
2435
- """
2436
- ...
2437
- def __getitem__(self, name: str) -> metaflow.client.core.Step:
2438
- """
2439
- Returns the Step object with the step name 'name'
2440
-
2441
- Parameters
2442
- ----------
2443
- name : str
2444
- Step name
2445
-
2446
- Returns
2447
- -------
2448
- Step
2449
- Step for this step name in this Run
2450
-
2451
- Raises
2452
- ------
2453
- KeyError
2454
- If the name does not identify a valid Step object
2455
- """
2456
- ...
2457
- def __getstate__(self):
2458
- ...
2459
- def __setstate__(self, state):
2460
- ...
2461
- @property
2462
- def trigger(self) -> typing.Optional[metaflow.events.Trigger]:
2463
- """
2464
- Returns a container of events that triggered this run.
2465
-
2466
- This returns None if the run was not triggered by any events.
2467
-
2468
- Returns
2469
- -------
2470
- Trigger, optional
2471
- Container of triggering events
2472
- """
2473
- ...
2474
- ...
2475
-
2476
- class Step(metaflow.client.core.MetaflowObject, metaclass=type):
2477
- """
2478
- A `Step` represents a user-defined step, that is, a method annotated with the `@step` decorator.
2479
-
2480
- It contains `Task` objects associated with the step, that is, all executions of the
2481
- `Step`. The step may contain multiple `Task`s in the case of a foreach step.
2482
-
2483
- Attributes
2484
- ----------
2485
- task : Task
2486
- The first `Task` object in this step. This is a shortcut for retrieving the only
2487
- task contained in a non-foreach step.
2488
- finished_at : datetime
2489
- Time when the latest `Task` of this step finished. Note that in the case of foreaches,
2490
- this time may change during execution of the step.
2491
- environment_info : Dict[str, Any]
2492
- Information about the execution environment.
2493
- """
2494
- @property
2495
- def task(self) -> typing.Optional[metaflow.client.core.Task]:
2496
- """
2497
- Returns a Task object belonging to this step.
2498
-
2499
- This is useful when the step only contains one task (a linear step for example).
2500
-
2501
- Returns
2502
- -------
2503
- Task
2504
- A task in the step
2505
- """
2506
- ...
2507
- def tasks(self, *tags: str) -> typing.Iterable[metaflow.client.core.Task]:
2508
- """
2509
- [Legacy function - do not use]
2510
-
2511
- Returns an iterator over all `Task` objects in the step. This is an alias
2512
- to iterating the object itself, i.e.
2513
- ```
2514
- list(Step(...)) == list(Step(...).tasks())
2515
- ```
2516
-
2517
- Parameters
2518
- ----------
2519
- tags : str
2520
- No op (legacy functionality)
2521
-
2522
- Yields
2523
- ------
2524
- Task
2525
- `Task` objects in this step.
2526
- """
2527
- ...
2528
- @property
2529
- def control_task(self) -> typing.Optional[metaflow.client.core.Task]:
2530
- """
2531
- [Unpublished API - use with caution!]
2532
-
2533
- Returns a Control Task object belonging to this step.
2534
- This is useful when the step only contains one control task.
2535
-
2536
- Returns
2537
- -------
2538
- Task
2539
- A control task in the step
2540
- """
2541
- ...
2542
- def control_tasks(self, *tags: str) -> typing.Iterator[metaflow.client.core.Task]:
2543
- """
2544
- [Unpublished API - use with caution!]
2545
-
2546
- Returns an iterator over all the control tasks in the step.
2547
- An optional filter is available that allows you to filter on tags. The
2548
- control tasks returned if the filter is specified will contain all the
2549
- tags specified.
2550
- Parameters
2551
- ----------
2552
- tags : str
2553
- Tags to match
2554
-
2555
- Yields
2556
- ------
2557
- Task
2558
- Control Task objects for this step
2559
- """
2560
- ...
2561
- def __iter__(self) -> typing.Iterator[metaflow.client.core.Task]:
2562
- """
2563
- Iterate over all children Task of this Step
2564
-
2565
- Yields
2566
- ------
2567
- Task
2568
- A Task in this Step
2569
- """
2570
- ...
2571
- def __getitem__(self, task_id: str) -> metaflow.client.core.Task:
2572
- """
2573
- Returns the Task object with the task ID 'task_id'
2574
-
2575
- Parameters
2576
- ----------
2577
- task_id : str
2578
- Task ID
2579
-
2580
- Returns
2581
- -------
2582
- Task
2583
- Task for this task ID in this Step
2584
-
2585
- Raises
2586
- ------
2587
- KeyError
2588
- If the task_id does not identify a valid Task object
2589
- """
2590
- ...
2591
- def __getstate__(self):
2592
- ...
2593
- def __setstate__(self, state):
2594
- ...
2595
- @property
2596
- def finished_at(self) -> typing.Optional[datetime.datetime]:
2597
- """
2598
- Returns the datetime object of when the step finished (successfully or not).
2599
-
2600
- A step is considered finished when all the tasks that belong to it have
2601
- finished. This call will return None if the step has not finished
2602
-
2603
- Returns
2604
- -------
2605
- datetime
2606
- Datetime of when the step finished
2607
- """
2608
- ...
2609
- @property
2610
- def environment_info(self) -> typing.Optional[typing.Dict[str, typing.Any]]:
2611
- """
2612
- Returns information about the environment that was used to execute this step. As an
2613
- example, if the Conda environment is selected, this will return information about the
2614
- dependencies that were used in the environment.
2615
-
2616
- This environment information is only available for steps that have tasks
2617
- for which the code package has been saved.
2618
-
2619
- Returns
2620
- -------
2621
- Dict[str, Any], optional
2622
- Dictionary describing the environment
2623
- """
2624
- ...
2625
- ...
2626
-
2627
- class Task(metaflow.client.core.MetaflowObject, metaclass=type):
2628
- """
2629
- A `Task` represents an execution of a `Step`.
2630
-
2631
- It contains all `DataArtifact` objects produced by the task as
2632
- well as metadata related to execution.
2633
-
2634
- Note that the `@retry` decorator may cause multiple attempts of
2635
- the task to be present. Usually you want the latest attempt, which
2636
- is what instantiating a `Task` object returns by default. If
2637
- you need to e.g. retrieve logs from a failed attempt, you can
2638
- explicitly get information about a specific attempt by using the
2639
- following syntax when creating a task:
2640
-
2641
- `Task('flow/run/step/task', attempt=<attempt>)`
2642
-
2643
- where `attempt=0` corresponds to the first attempt etc.
2644
-
2645
- Attributes
2646
- ----------
2647
- metadata : List[Metadata]
2648
- List of all metadata events associated with the task.
2649
- metadata_dict : Dict[str, str]
2650
- A condensed version of `metadata`: A dictionary where keys
2651
- are names of metadata events and values the latest corresponding event.
2652
- data : MetaflowData
2653
- Container of all data artifacts produced by this task. Note that this
2654
- call downloads all data locally, so it can be slower than accessing
2655
- artifacts individually. See `MetaflowData` for more information.
2656
- artifacts : MetaflowArtifacts
2657
- Container of `DataArtifact` objects produced by this task.
2658
- successful : bool
2659
- True if the task completed successfully.
2660
- finished : bool
2661
- True if the task completed.
2662
- exception : object
2663
- Exception raised by this task if there was one.
2664
- finished_at : datetime
2665
- Time this task finished.
2666
- runtime_name : str
2667
- Runtime this task was executed on.
2668
- stdout : str
2669
- Standard output for the task execution.
2670
- stderr : str
2671
- Standard error output for the task execution.
2672
- code : MetaflowCode
2673
- Code package for this task (if present). See `MetaflowCode`.
2674
- environment_info : Dict[str, str]
2675
- Information about the execution environment.
2676
- """
2677
- def __init__(self, *args, **kwargs):
2678
- ...
2679
- @property
2680
- def metadata(self) -> typing.List[metaflow.client.core.Metadata]:
2681
- """
2682
- Metadata events produced by this task across all attempts of the task
2683
- *except* if you selected a specific task attempt.
2684
-
2685
- Note that Metadata is different from tags.
2686
-
2687
- Returns
2688
- -------
2689
- List[Metadata]
2690
- Metadata produced by this task
2691
- """
2692
- ...
2693
- @property
2694
- def metadata_dict(self) -> typing.Dict[str, str]:
2695
- """
2696
- Dictionary mapping metadata names (keys) and their associated values.
2697
-
2698
- Note that unlike the metadata() method, this call will only return the latest
2699
- metadata for a given name. For example, if a task executes multiple times (retries),
2700
- the same metadata name will be generated multiple times (one for each execution of the
2701
- task). The metadata() method returns all those metadata elements whereas this call will
2702
- return the metadata associated with the latest execution of the task.
2703
-
2704
- Returns
2705
- -------
2706
- Dict[str, str]
2707
- Dictionary mapping metadata name with value
2708
- """
2709
- ...
2710
- @property
2711
- def index(self) -> typing.Optional[int]:
2712
- """
2713
- Returns the index of the innermost foreach loop if this task is run inside at least
2714
- one foreach.
2715
-
2716
- The index is what distinguishes the various tasks inside a given step.
2717
- This call returns None if this task was not run in a foreach loop.
2718
-
2719
- Returns
2720
- -------
2721
- int, optional
2722
- Index in the innermost loop for this task
2723
- """
2724
- ...
2725
- @property
2726
- def data(self) -> metaflow.client.core.MetaflowData:
2727
- """
2728
- Returns a container of data artifacts produced by this task.
2729
-
2730
- You can access data produced by this task as follows:
2731
- ```
2732
- print(task.data.my_var)
2733
- ```
2734
-
2735
- Returns
2736
- -------
2737
- MetaflowData
2738
- Container of all artifacts produced by this task
2739
- """
2740
- ...
2741
- @property
2742
- def artifacts(self) -> typing.NamedTuple:
2743
- """
2744
- Returns a container of DataArtifacts produced by this task.
2745
-
2746
- You can access each DataArtifact by name like so:
2747
- ```
2748
- print(task.artifacts.my_var)
2749
- ```
2750
- This method differs from data() because it returns DataArtifact objects
2751
- (which contain additional metadata) as opposed to just the data.
2752
-
2753
- Returns
2754
- -------
2755
- MetaflowArtifacts
2756
- Container of all DataArtifacts produced by this task
2757
- """
2758
- ...
2759
- @property
2760
- def successful(self) -> bool:
2761
- """
2762
- Indicates whether or not the task completed successfully.
2763
-
2764
- This information is always about the latest task to have completed (in case
2765
- of retries).
2766
-
2767
- Returns
2768
- -------
2769
- bool
2770
- True if the task completed successfully and False otherwise
2771
- """
2772
- ...
2773
- @property
2774
- def finished(self) -> bool:
2775
- """
2776
- Indicates whether or not the task completed.
2777
-
2778
- This information is always about the latest task to have completed (in case
2779
- of retries).
2780
-
2781
- Returns
2782
- -------
2783
- bool
2784
- True if the task completed and False otherwise
2785
- """
2786
- ...
2787
- @property
2788
- def exception(self) -> typing.Optional[typing.Any]:
2789
- """
2790
- Returns the exception that caused the task to fail, if any.
2791
-
2792
- This information is always about the latest task to have completed (in case
2793
- of retries). If successful() returns False and finished() returns True,
2794
- this method can help determine what went wrong.
2795
-
2796
- Returns
2797
- -------
2798
- object
2799
- Exception raised by the task or None if not applicable
2800
- """
2801
- ...
2802
- @property
2803
- def finished_at(self) -> typing.Optional[datetime.datetime]:
2804
- """
2805
- Returns the datetime object of when the task finished (successfully or not).
2806
-
2807
- This information is always about the latest task to have completed (in case
2808
- of retries). This call will return None if the task is not finished.
2809
-
2810
- Returns
2811
- -------
2812
- datetime
2813
- Datetime of when the task finished
2814
- """
2815
- ...
2816
- @property
2817
- def runtime_name(self) -> typing.Optional[str]:
2818
- """
2819
- Returns the name of the runtime this task executed on.
2820
-
2821
-
2822
- Returns
2823
- -------
2824
- str
2825
- Name of the runtime this task executed on
2826
- """
2827
- ...
2828
- @property
2829
- def stdout(self) -> str:
2830
- """
2831
- Returns the full standard out of this task.
2832
-
2833
- If you specify a specific attempt for this task, it will return the
2834
- standard out for that attempt. If you do not specify an attempt,
2835
- this will return the current standard out for the latest *started*
2836
- attempt of the task. In both cases, multiple calls to this
2837
- method will return the most up-to-date log (so if an attempt is not
2838
- done, each call will fetch the latest log).
2839
-
2840
- Returns
2841
- -------
2842
- str
2843
- Standard output of this task
2844
- """
2845
- ...
2846
- @property
2847
- def stdout_size(self) -> int:
2848
- """
2849
- Returns the size of the stdout log of this task.
2850
-
2851
- Similar to `stdout`, the size returned is the latest size of the log
2852
- (so for a running attempt, this value will increase as the task produces
2853
- more output).
2854
-
2855
- Returns
2856
- -------
2857
- int
2858
- Size of the stdout log content (in bytes)
2859
- """
2860
- ...
2861
- @property
2862
- def stderr(self) -> str:
2863
- """
2864
- Returns the full standard error of this task.
2865
-
2866
- If you specify a specific attempt for this task, it will return the
2867
- standard error for that attempt. If you do not specify an attempt,
2868
- this will return the current standard error for the latest *started*
2869
- attempt. In both cases, multiple calls to this
2870
- method will return the most up-to-date log (so if an attempt is not
2871
- done, each call will fetch the latest log).
2872
-
2873
- Returns
2874
- -------
2875
- str
2876
- Standard error of this task
2877
- """
2878
- ...
2879
- @property
2880
- def stderr_size(self) -> int:
2881
- """
2882
- Returns the size of the stderr log of this task.
2883
-
2884
- Similar to `stderr`, the size returned is the latest size of the log
2885
- (so for a running attempt, this value will increase as the task produces
2886
- more output).
2887
-
2888
- Returns
2889
- -------
2890
- int
2891
- Size of the stderr log content (in bytes)
2892
- """
2893
- ...
2894
- @property
2895
- def current_attempt(self) -> int:
2896
- """
2897
- Get the relevant attempt for this Task.
2898
-
2899
- Returns the specific attempt used when
2900
- initializing the instance, or the latest *started* attempt for the Task.
2901
-
2902
- Returns
2903
- -------
2904
- int
2905
- attempt id for this task object
2906
- """
2907
- ...
2908
- @property
2909
- def code(self) -> typing.Optional[metaflow.client.core.MetaflowCode]:
2910
- """
2911
- Returns the MetaflowCode object for this task, if present.
2912
-
2913
- Not all tasks save their code so this call may return None in those cases.
2914
-
2915
- Returns
2916
- -------
2917
- MetaflowCode
2918
- Code package for this task
2919
- """
2920
- ...
2921
- @property
2922
- def environment_info(self) -> typing.Dict[str, typing.Any]:
2923
- """
2924
- Returns information about the environment that was used to execute this task. As an
2925
- example, if the Conda environment is selected, this will return information about the
2926
- dependencies that were used in the environment.
2927
-
2928
- This environment information is only available for tasks that have a code package.
2929
-
2930
- Returns
2931
- -------
2932
- Dict
2933
- Dictionary describing the environment
2934
- """
2935
- ...
2936
- def loglines(self, stream: str, as_unicode: bool = True, meta_dict: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Iterator[typing.Tuple[datetime.datetime, str]]:
2937
- """
2938
- Return an iterator over (utc_timestamp, logline) tuples.
2939
-
2940
- Parameters
2941
- ----------
2942
- stream : str
2943
- Either 'stdout' or 'stderr'.
2944
- as_unicode : bool, default: True
2945
- If as_unicode=False, each logline is returned as a byte object. Otherwise,
2946
- it is returned as a (unicode) string.
2947
-
2948
- Yields
2949
- ------
2950
- Tuple[datetime, str]
2951
- Tuple of timestamp, logline pairs.
2952
- """
2953
- ...
2954
- def __iter__(self) -> typing.Iterator[metaflow.client.core.DataArtifact]:
2955
- """
2956
- Iterate over all children DataArtifact of this Task
2957
-
2958
- Yields
2959
- ------
2960
- DataArtifact
2961
- A DataArtifact in this Step
2962
- """
2963
- ...
2964
- def __getitem__(self, name: str) -> metaflow.client.core.DataArtifact:
2965
- """
2966
- Returns the DataArtifact object with the artifact name 'name'
2967
-
2968
- Parameters
2969
- ----------
2970
- name : str
2971
- Data artifact name
2972
-
2973
- Returns
2974
- -------
2975
- DataArtifact
2976
- DataArtifact for this artifact name in this task
2977
-
2978
- Raises
2979
- ------
2980
- KeyError
2981
- If the name does not identify a valid DataArtifact object
2982
- """
2983
- ...
2984
- def __getstate__(self):
2985
- ...
2986
- def __setstate__(self, state):
2987
- ...
2988
- ...
2989
-
2990
- class DataArtifact(metaflow.client.core.MetaflowObject, metaclass=type):
2991
- """
2992
- A single data artifact and associated metadata. Note that this object does
2993
- not contain other objects as it is the leaf object in the hierarchy.
2994
-
2995
- Attributes
2996
- ----------
2997
- data : object
2998
- The data contained in this artifact, that is, the object produced during
2999
- execution of this run.
3000
- sha : string
3001
- A unique ID of this artifact.
3002
- finished_at : datetime
3003
- Corresponds roughly to the `Task.finished_at` time of the parent `Task`.
3004
- An alias for `DataArtifact.created_at`.
3005
- """
3006
- @property
3007
- def data(self) -> typing.Any:
3008
- """
3009
- Unpickled representation of the data contained in this artifact.
3010
-
3011
- Returns
3012
- -------
3013
- object
3014
- Object contained in this artifact
3015
- """
3016
- ...
3017
- @property
3018
- def size(self) -> int:
3019
- """
3020
- Returns the size (in bytes) of the pickled object representing this
3021
- DataArtifact
3022
-
3023
- Returns
3024
- -------
3025
- int
3026
- size of the pickled representation of data artifact (in bytes)
3027
- """
3028
- ...
3029
- @property
3030
- def sha(self) -> str:
3031
- """
3032
- Unique identifier for this artifact.
3033
-
3034
- This is a unique hash of the artifact (historically SHA1 hash)
3035
-
3036
- Returns
3037
- -------
3038
- str
3039
- Hash of this artifact
3040
- """
3041
- ...
3042
- @property
3043
- def finished_at(self) -> datetime.datetime:
3044
- """
3045
- Creation time for this artifact.
3046
-
3047
- Alias for created_at.
3048
-
3049
- Returns
3050
- -------
3051
- datetime
3052
- Creation time
3053
- """
3054
- ...
3055
- def __getstate__(self):
3056
- ...
3057
- def __setstate__(self, state):
3058
- ...
3059
- ...
3060
-
3061
- class Runner(object, metaclass=type):
3062
- """
3063
- Metaflow's Runner API that presents a programmatic interface
3064
- to run flows and perform other operations either synchronously or asynchronously.
3065
- The class expects a path to the flow file along with optional arguments
3066
- that match top-level options on the command-line.
3067
-
3068
- This class works as a context manager, calling `cleanup()` to remove
3069
- temporary files at exit.
661
+ Specifies the flow(s) that this flow depends on.
3070
662
 
3071
- Example:
3072
- ```python
3073
- with Runner('slowflow.py', pylint=False) as runner:
3074
- result = runner.run(alpha=5, tags=["abc", "def"], max_workers=5)
3075
- print(result.run.finished)
3076
663
  ```
3077
-
3078
- Parameters
3079
- ----------
3080
- flow_file : str
3081
- Path to the flow file to run
3082
- show_output : bool, default True
3083
- Show the 'stdout' and 'stderr' to the console by default,
3084
- Only applicable for synchronous 'run' and 'resume' functions.
3085
- profile : Optional[str], default None
3086
- Metaflow profile to use to run this run. If not specified, the default
3087
- profile is used (or the one already set using `METAFLOW_PROFILE`)
3088
- env : Optional[Dict], default None
3089
- Additional environment variables to set for the Run. This overrides the
3090
- environment set for this process.
3091
- cwd : Optional[str], default None
3092
- The directory to run the subprocess in; if not specified, the current
3093
- directory is used.
3094
- file_read_timeout : int, default 3600
3095
- The timeout until which we try to read the runner attribute file.
3096
- **kwargs : Any
3097
- Additional arguments that you would pass to `python myflow.py` before
3098
- the `run` command.
3099
- """
3100
- def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
3101
- ...
3102
- def __enter__(self) -> metaflow.runner.metaflow_runner.Runner:
3103
- ...
3104
- def __aenter__(self) -> metaflow.runner.metaflow_runner.Runner:
3105
- ...
3106
- def _Runner__get_executing_run(self, tfp_runner_attribute, command_obj):
3107
- ...
3108
- def run(self, **kwargs) -> metaflow.runner.metaflow_runner.ExecutingRun:
3109
- """
3110
- Blocking execution of the run. This method will wait until
3111
- the run has completed execution.
3112
-
3113
- Parameters
3114
- ----------
3115
- **kwargs : Any
3116
- Additional arguments that you would pass to `python myflow.py` after
3117
- the `run` command, in particular, any parameters accepted by the flow.
3118
-
3119
- Returns
3120
- -------
3121
- ExecutingRun
3122
- ExecutingRun containing the results of the run.
3123
- """
3124
- ...
3125
- def resume(self, **kwargs):
3126
- """
3127
- Blocking resume execution of the run.
3128
- This method will wait until the resumed run has completed execution.
3129
-
3130
- Parameters
3131
- ----------
3132
- **kwargs : Any
3133
- Additional arguments that you would pass to `python ./myflow.py` after
3134
- the `resume` command.
3135
-
3136
- Returns
3137
- -------
3138
- ExecutingRun
3139
- ExecutingRun containing the results of the resumed run.
3140
- """
3141
- ...
3142
- def async_run(self, **kwargs) -> metaflow.runner.metaflow_runner.ExecutingRun:
3143
- """
3144
- Non-blocking execution of the run. This method will return as soon as the
3145
- run has launched.
3146
-
3147
- Note that this method is asynchronous and needs to be `await`ed.
3148
-
3149
- Parameters
3150
- ----------
3151
- **kwargs : Any
3152
- Additional arguments that you would pass to `python myflow.py` after
3153
- the `run` command, in particular, any parameters accepted by the flow.
3154
-
3155
- Returns
3156
- -------
3157
- ExecutingRun
3158
- ExecutingRun representing the run that was started.
3159
- """
3160
- ...
3161
- def async_resume(self, **kwargs):
3162
- """
3163
- Non-blocking resume execution of the run.
3164
- This method will return as soon as the resume has launched.
3165
-
3166
- Note that this method is asynchronous and needs to be `await`ed.
3167
-
3168
- Parameters
3169
- ----------
3170
- **kwargs : Any
3171
- Additional arguments that you would pass to `python myflow.py` after
3172
- the `resume` command.
3173
-
3174
- Returns
3175
- -------
3176
- ExecutingRun
3177
- ExecutingRun representing the resumed run that was started.
3178
- """
3179
- ...
3180
- def __exit__(self, exc_type, exc_value, traceback):
3181
- ...
3182
- def __aexit__(self, exc_type, exc_value, traceback):
3183
- ...
3184
- def cleanup(self):
3185
- """
3186
- Delete any temporary files created during execution.
3187
- """
3188
- ...
3189
- ...
3190
-
3191
- class NBRunner(object, metaclass=type):
3192
- """
3193
- A wrapper over `Runner` for executing flows defined in a Jupyter
3194
- notebook cell.
3195
-
3196
- Instantiate this class on the last line of a notebook cell where
3197
- a `flow` is defined. In contrast to `Runner`, this class is not
3198
- meant to be used in a context manager. Instead, use a blocking helper
3199
- function like `nbrun` (which calls `cleanup()` internally) or call
3200
- `cleanup()` explictly when using non-blocking APIs.
3201
-
3202
- ```python
3203
- run = NBRunner(FlowName).nbrun()
664
+ @trigger_on_finish(flow='FooFlow')
665
+ ```
666
+ or
3204
667
  ```
668
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
669
+ ```
670
+ This decorator respects the @project decorator and triggers the flow
671
+ when upstream runs within the same namespace complete successfully
3205
672
 
3206
- Parameters
3207
- ----------
3208
- flow : FlowSpec
3209
- Flow defined in the same cell
3210
- show_output : bool, default True
3211
- Show the 'stdout' and 'stderr' to the console by default,
3212
- Only applicable for synchronous 'run' and 'resume' functions.
3213
- profile : Optional[str], default None
3214
- Metaflow profile to use to run this run. If not specified, the default
3215
- profile is used (or the one already set using `METAFLOW_PROFILE`)
3216
- env : Optional[Dict], default None
3217
- Additional environment variables to set for the Run. This overrides the
3218
- environment set for this process.
3219
- base_dir : Optional[str], default None
3220
- The directory to run the subprocess in; if not specified, the current
3221
- working directory is used.
3222
- file_read_timeout : int, default 3600
3223
- The timeout until which we try to read the runner attribute file.
3224
- **kwargs : Any
3225
- Additional arguments that you would pass to `python myflow.py` before
3226
- the `run` command.
3227
- """
3228
- def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
3229
- ...
3230
- def nbrun(self, **kwargs):
3231
- """
3232
- Blocking execution of the run. This method will wait until
3233
- the run has completed execution.
3234
-
3235
- Note that in contrast to `run`, this method returns a
3236
- `metaflow.Run` object directly and calls `cleanup()` internally
3237
- to support a common notebook pattern of executing a flow and
3238
- retrieving its results immediately.
3239
-
3240
- Parameters
3241
- ----------
3242
- **kwargs : Any
3243
- Additional arguments that you would pass to `python myflow.py` after
3244
- the `run` command, in particular, any parameters accepted by the flow.
3245
-
3246
- Returns
3247
- -------
3248
- Run
3249
- A `metaflow.Run` object representing the finished run.
3250
- """
3251
- ...
3252
- def nbresume(self, **kwargs):
3253
- """
3254
- Blocking resuming of a run. This method will wait until
3255
- the resumed run has completed execution.
3256
-
3257
- Note that in contrast to `resume`, this method returns a
3258
- `metaflow.Run` object directly and calls `cleanup()` internally
3259
- to support a common notebook pattern of executing a flow and
3260
- retrieving its results immediately.
3261
-
3262
- Parameters
3263
- ----------
3264
- **kwargs : Any
3265
- Additional arguments that you would pass to `python myflow.py` after
3266
- the `resume` command.
3267
-
3268
- Returns
3269
- -------
3270
- Run
3271
- A `metaflow.Run` object representing the resumed run.
3272
- """
3273
- ...
3274
- def run(self, **kwargs):
3275
- """
3276
- Runs the flow.
3277
- """
3278
- ...
3279
- def resume(self, **kwargs):
3280
- """
3281
- Resumes the flow.
3282
- """
3283
- ...
3284
- def async_run(self, **kwargs):
3285
- """
3286
- Non-blocking execution of the run. This method will return as soon as the
3287
- run has launched. This method is equivalent to `Runner.async_run`.
3288
-
3289
- Note that this method is asynchronous and needs to be `await`ed.
3290
-
3291
-
3292
- Parameters
3293
- ----------
3294
- **kwargs : Any
3295
- Additional arguments that you would pass to `python myflow.py` after
3296
- the `run` command, in particular, any parameters accepted by the flow.
3297
-
3298
- Returns
3299
- -------
3300
- ExecutingRun
3301
- ExecutingRun representing the run that was started.
3302
- """
3303
- ...
3304
- def async_resume(self, **kwargs):
3305
- """
3306
- Non-blocking execution of the run. This method will return as soon as the
3307
- run has launched. This method is equivalent to `Runner.async_resume`.
3308
-
3309
- Note that this method is asynchronous and needs to be `await`ed.
3310
-
3311
- Parameters
3312
- ----------
3313
- **kwargs : Any
3314
- Additional arguments that you would pass to `python myflow.py` after
3315
- the `run` command, in particular, any parameters accepted by the flow.
3316
-
3317
- Returns
3318
- -------
3319
- ExecutingRun
3320
- ExecutingRun representing the run that was started.
3321
- """
3322
- ...
3323
- def cleanup(self):
3324
- """
3325
- Delete any temporary files created during execution.
3326
-
3327
- Call this method after using `async_run` or `async_resume`. You don't
3328
- have to call this after `nbrun` or `nbresume`.
3329
- """
3330
- ...
3331
- ...
3332
-
3333
- class Deployer(object, metaclass=type):
3334
- """
3335
- Use the `Deployer` class to configure and access one of the production
3336
- orchestrators supported by Metaflow.
673
+ Additionally, you can specify project aware upstream flow dependencies
674
+ by specifying the fully qualified project_flow_name.
675
+ ```
676
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
677
+ ```
678
+ or
679
+ ```
680
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
681
+ ```
3337
682
 
3338
- Parameters
3339
- ----------
3340
- flow_file : str
3341
- Path to the flow file to deploy.
3342
- show_output : bool, default True
3343
- Show the 'stdout' and 'stderr' to the console by default.
3344
- profile : Optional[str], default None
3345
- Metaflow profile to use for the deployment. If not specified, the default
3346
- profile is used.
3347
- env : Optional[Dict[str, str]], default None
3348
- Additional environment variables to set for the deployment.
3349
- cwd : Optional[str], default None
3350
- The directory to run the subprocess in; if not specified, the current
3351
- directory is used.
3352
- file_read_timeout : int, default 3600
3353
- The timeout until which we try to read the deployer attribute file.
3354
- **kwargs : Any
3355
- Additional arguments that you would pass to `python myflow.py` before
3356
- the deployment command.
3357
- """
3358
- def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
3359
- ...
3360
- def _Deployer__make_function(self, deployer_class):
3361
- """
3362
- Create a function for the given deployer class.
3363
-
3364
- Parameters
3365
- ----------
3366
- deployer_class : Type[DeployerImpl]
3367
- Deployer implementation class.
3368
-
3369
- Returns
3370
- -------
3371
- Callable
3372
- Function that initializes and returns an instance of the deployer class.
3373
- """
3374
- ...
3375
- ...
3376
-
3377
- class NBDeployer(object, metaclass=type):
3378
- """
3379
- A wrapper over `Deployer` for deploying flows defined in a Jupyter
3380
- notebook cell.
683
+ You can also specify just the project or project branch (other values will be
684
+ inferred from the current project or project branch):
685
+ ```
686
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
687
+ ```
3381
688
 
3382
- Instantiate this class on the last line of a notebook cell where
3383
- a `flow` is defined. In contrast to `Deployer`, this class is not
3384
- meant to be used in a context manager.
689
+ Note that `branch` is typically one of:
690
+ - `prod`
691
+ - `user.bob`
692
+ - `test.my_experiment`
693
+ - `prod.staging`
694
+ """
695
+ ...
696
+
697
+ @typing.overload
698
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
699
+ """
700
+ Specifies the Conda environment for all steps of the flow.
3385
701
 
3386
- ```python
3387
- deployer = NBDeployer(FlowName)
3388
- ar = deployer.argo_workflows(name="madhur")
3389
- ar_obj = ar.create()
3390
- result = ar_obj.trigger(alpha=300)
3391
- print(result.status)
3392
- print(result.run)
3393
- result.terminate()
3394
- ```
702
+ Use `@conda_base` to set common libraries required by all
703
+ steps and use `@conda` to specify step-specific additions.
704
+ """
705
+ ...
706
+
707
+ @typing.overload
708
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
709
+ ...
710
+
711
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
712
+ """
713
+ Specifies the Conda environment for all steps of the flow.
3395
714
 
3396
- Parameters
3397
- ----------
3398
- flow : FlowSpec
3399
- Flow defined in the same cell
3400
- show_output : bool, default True
3401
- Show the 'stdout' and 'stderr' to the console by default,
3402
- profile : Optional[str], default None
3403
- Metaflow profile to use to deploy this run. If not specified, the default
3404
- profile is used (or the one already set using `METAFLOW_PROFILE`)
3405
- env : Optional[Dict[str, str]], default None
3406
- Additional environment variables to set. This overrides the
3407
- environment set for this process.
3408
- base_dir : Optional[str], default None
3409
- The directory to run the subprocess in; if not specified, the current
3410
- working directory is used.
3411
- **kwargs : Any
3412
- Additional arguments that you would pass to `python myflow.py` i.e. options
3413
- listed in `python myflow.py --help`
3414
- """
3415
- def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
3416
- ...
3417
- def cleanup(self):
3418
- """
3419
- Delete any temporary files created during execution.
3420
- """
3421
- ...
715
+ Use `@conda_base` to set common libraries required by all
716
+ steps and use `@conda` to specify step-specific additions.
717
+ """
3422
718
  ...
3423
719