metaflow-stubs 2.12.28__py2.py3-none-any.whl → 2.12.29__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow-stubs/__init__.pyi +297 -3001
- metaflow-stubs/cards.pyi +19 -473
- metaflow-stubs/cli.pyi +17 -81
- metaflow-stubs/client/__init__.pyi +19 -1113
- metaflow-stubs/client/core.pyi +18 -158
- metaflow-stubs/client/filecache.pyi +8 -12
- metaflow-stubs/clone_util.pyi +6 -26
- metaflow-stubs/events.pyi +6 -5
- metaflow-stubs/exception.pyi +8 -6
- metaflow-stubs/flowspec.pyi +22 -106
- metaflow-stubs/generated_for.txt +1 -1
- metaflow-stubs/includefile.pyi +16 -564
- metaflow-stubs/info_file.pyi +6 -5
- metaflow-stubs/metadata_provider/__init__.pyi +16 -0
- metaflow-stubs/metadata_provider/heartbeat.pyi +34 -0
- metaflow-stubs/{metadata → metadata_provider}/metadata.pyi +10 -22
- metaflow-stubs/metadata_provider/util.pyi +19 -0
- metaflow-stubs/metaflow_config.pyi +8 -11
- metaflow-stubs/metaflow_current.pyi +10 -9
- metaflow-stubs/mflog/__init__.pyi +6 -0
- metaflow-stubs/mflog/mflog.pyi +52 -5
- metaflow-stubs/multicore_utils.pyi +6 -5
- metaflow-stubs/parameters.pyi +13 -23
- metaflow-stubs/plugins/__init__.pyi +51 -163
- metaflow-stubs/plugins/airflow/__init__.pyi +12 -5
- metaflow-stubs/plugins/airflow/airflow.pyi +19 -130
- metaflow-stubs/plugins/airflow/airflow_cli.pyi +17 -136
- metaflow-stubs/plugins/airflow/airflow_decorator.pyi +7 -26
- metaflow-stubs/plugins/airflow/airflow_utils.pyi +7 -6
- metaflow-stubs/plugins/airflow/exception.pyi +7 -11
- metaflow-stubs/plugins/airflow/sensors/__init__.pyi +10 -97
- metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +9 -30
- metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +9 -40
- metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +9 -40
- metaflow-stubs/plugins/argo/__init__.pyi +12 -5
- metaflow-stubs/plugins/argo/argo_client.pyi +8 -26
- metaflow-stubs/plugins/argo/argo_events.pyi +7 -11
- metaflow-stubs/plugins/argo/argo_workflows.pyi +16 -120
- metaflow-stubs/plugins/argo/argo_workflows_cli.pyi +22 -460
- metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +12 -404
- metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +65 -322
- metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +165 -0
- metaflow-stubs/plugins/aws/__init__.pyi +11 -5
- metaflow-stubs/plugins/aws/aws_client.pyi +6 -5
- metaflow-stubs/plugins/aws/aws_utils.pyi +6 -11
- metaflow-stubs/plugins/aws/batch/__init__.pyi +10 -5
- metaflow-stubs/plugins/aws/batch/batch.pyi +10 -55
- metaflow-stubs/plugins/aws/batch/batch_cli.pyi +10 -31
- metaflow-stubs/plugins/aws/batch/batch_client.pyi +7 -11
- metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +15 -140
- metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +7 -5
- metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +10 -21
- metaflow-stubs/plugins/aws/step_functions/__init__.pyi +15 -5
- metaflow-stubs/plugins/aws/step_functions/dynamo_db_client.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/production_token.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +7 -5
- metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +11 -65
- metaflow-stubs/plugins/aws/step_functions/step_functions_cli.pyi +19 -175
- metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/step_functions_decorator.pyi +8 -37
- metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +53 -290
- metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +127 -0
- metaflow-stubs/plugins/azure/__init__.pyi +12 -7
- metaflow-stubs/plugins/azure/azure_credential.pyi +6 -5
- metaflow-stubs/plugins/azure/azure_exceptions.pyi +7 -11
- metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +11 -24
- metaflow-stubs/plugins/azure/azure_utils.pyi +11 -29
- metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +8 -23
- metaflow-stubs/plugins/azure/includefile_support.pyi +7 -17
- metaflow-stubs/plugins/cards/__init__.pyi +15 -5
- metaflow-stubs/plugins/cards/card_cli.pyi +22 -491
- metaflow-stubs/plugins/cards/card_client.pyi +13 -75
- metaflow-stubs/plugins/cards/card_creator.pyi +7 -10
- metaflow-stubs/plugins/cards/card_datastore.pyi +10 -18
- metaflow-stubs/plugins/cards/card_decorator.pyi +10 -126
- metaflow-stubs/plugins/cards/card_modules/__init__.pyi +14 -81
- metaflow-stubs/plugins/cards/card_modules/basic.pyi +14 -97
- metaflow-stubs/plugins/cards/card_modules/card.pyi +6 -5
- metaflow-stubs/plugins/cards/card_modules/chevron/__init__.pyi +12 -73
- metaflow-stubs/plugins/cards/card_modules/chevron/main.pyi +6 -61
- metaflow-stubs/plugins/cards/card_modules/chevron/metadata.pyi +6 -5
- metaflow-stubs/plugins/cards/card_modules/chevron/renderer.pyi +8 -45
- metaflow-stubs/plugins/cards/card_modules/chevron/tokenizer.pyi +7 -6
- metaflow-stubs/plugins/cards/card_modules/components.pyi +25 -108
- metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +6 -5
- metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +6 -12
- metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +11 -88
- metaflow-stubs/plugins/cards/card_resolver.pyi +6 -49
- metaflow-stubs/plugins/cards/component_serializer.pyi +13 -63
- metaflow-stubs/plugins/cards/exception.pyi +7 -11
- metaflow-stubs/plugins/catch_decorator.pyi +10 -30
- metaflow-stubs/plugins/datatools/__init__.pyi +13 -392
- metaflow-stubs/plugins/datatools/local.pyi +7 -11
- metaflow-stubs/plugins/datatools/s3/__init__.pyi +19 -653
- metaflow-stubs/plugins/datatools/s3/s3.pyi +16 -264
- metaflow-stubs/plugins/datatools/s3/s3tail.pyi +7 -10
- metaflow-stubs/plugins/datatools/s3/s3util.pyi +6 -11
- metaflow-stubs/plugins/debug_logger.pyi +7 -5
- metaflow-stubs/plugins/debug_monitor.pyi +7 -5
- metaflow-stubs/plugins/environment_decorator.pyi +7 -5
- metaflow-stubs/plugins/events_decorator.pyi +8 -14
- metaflow-stubs/plugins/frameworks/__init__.pyi +7 -5
- metaflow-stubs/plugins/frameworks/pytorch.pyi +8 -45
- metaflow-stubs/plugins/gcp/__init__.pyi +11 -7
- metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +11 -24
- metaflow-stubs/plugins/gcp/gs_exceptions.pyi +7 -11
- metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +6 -5
- metaflow-stubs/plugins/gcp/gs_utils.pyi +8 -20
- metaflow-stubs/plugins/gcp/includefile_support.pyi +7 -17
- metaflow-stubs/plugins/kubernetes/__init__.pyi +13 -5
- metaflow-stubs/plugins/kubernetes/kube_utils.pyi +6 -10
- metaflow-stubs/plugins/kubernetes/kubernetes.pyi +9 -29
- metaflow-stubs/plugins/kubernetes/kubernetes_cli.pyi +16 -155
- metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +9 -72
- metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +19 -142
- metaflow-stubs/plugins/kubernetes/kubernetes_job.pyi +8 -41
- metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +7 -11
- metaflow-stubs/plugins/logs_cli.pyi +11 -10
- metaflow-stubs/plugins/package_cli.pyi +7 -5
- metaflow-stubs/plugins/parallel_decorator.pyi +11 -59
- metaflow-stubs/plugins/project_decorator.pyi +8 -14
- metaflow-stubs/plugins/pypi/__init__.pyi +12 -11
- metaflow-stubs/plugins/pypi/conda_decorator.pyi +8 -27
- metaflow-stubs/plugins/pypi/conda_environment.pyi +14 -20
- metaflow-stubs/plugins/pypi/pypi_decorator.pyi +7 -5
- metaflow-stubs/plugins/pypi/pypi_environment.pyi +7 -39
- metaflow-stubs/plugins/pypi/utils.pyi +7 -11
- metaflow-stubs/plugins/resources_decorator.pyi +7 -5
- metaflow-stubs/plugins/retry_decorator.pyi +7 -11
- metaflow-stubs/plugins/secrets/__init__.pyi +9 -5
- metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +9 -14
- metaflow-stubs/plugins/secrets/secrets_decorator.pyi +7 -11
- metaflow-stubs/plugins/storage_executor.pyi +6 -11
- metaflow-stubs/plugins/tag_cli.pyi +14 -396
- metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +9 -34
- metaflow-stubs/plugins/timeout_decorator.pyi +8 -12
- metaflow-stubs/procpoll.pyi +7 -5
- metaflow-stubs/pylint_wrapper.pyi +7 -11
- metaflow-stubs/runner/__init__.pyi +13 -5
- metaflow-stubs/runner/deployer.pyi +102 -210
- metaflow-stubs/runner/deployer_impl.pyi +87 -0
- metaflow-stubs/runner/metaflow_runner.pyi +24 -508
- metaflow-stubs/runner/nbdeploy.pyi +16 -60
- metaflow-stubs/runner/nbrun.pyi +11 -148
- metaflow-stubs/runner/subprocess_manager.pyi +9 -10
- metaflow-stubs/runner/utils.pyi +44 -9
- metaflow-stubs/system/__init__.pyi +9 -87
- metaflow-stubs/system/system_logger.pyi +7 -6
- metaflow-stubs/system/system_monitor.pyi +6 -5
- metaflow-stubs/tagging_util.pyi +6 -10
- metaflow-stubs/tuple_util.pyi +6 -5
- metaflow-stubs/version.pyi +6 -5
- {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.29.dist-info}/METADATA +2 -2
- metaflow_stubs-2.12.29.dist-info/RECORD +158 -0
- metaflow-stubs/metadata/util.pyi +0 -18
- metaflow_stubs-2.12.28.dist-info/RECORD +0 -152
- {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.29.dist-info}/WHEEL +0 -0
- {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.29.dist-info}/top_level.txt +0 -0
metaflow-stubs/__init__.pyi
CHANGED
@@ -1,785 +1,68 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# MF version: 2.12.
|
4
|
-
# Generated on 2024-11-
|
5
|
-
|
1
|
+
######################################################################################################
|
2
|
+
# Auto-generated Metaflow stub file #
|
3
|
+
# MF version: 2.12.29 #
|
4
|
+
# Generated on 2024-11-07T22:19:34.657765 #
|
5
|
+
######################################################################################################
|
6
6
|
|
7
7
|
from __future__ import annotations
|
8
8
|
|
9
9
|
import typing
|
10
10
|
if typing.TYPE_CHECKING:
|
11
|
-
import metaflow.metaflow_current
|
12
|
-
import typing
|
13
11
|
import datetime
|
14
|
-
import
|
15
|
-
import metaflow.runner.metaflow_runner
|
16
|
-
import metaflow.client.core
|
17
|
-
import metaflow.events
|
18
|
-
import metaflow.datastore.inputs
|
19
|
-
import metaflow.flowspec
|
20
|
-
import metaflow._vendor.click.types
|
21
|
-
import metaflow.parameters
|
22
|
-
import metaflow.plugins.datatools.s3.s3
|
12
|
+
import typing
|
23
13
|
FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
|
24
14
|
StepFlag = typing.NewType("StepFlag", bool)
|
25
15
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
current: metaflow.metaflow_current.Current
|
76
|
-
|
77
|
-
def metadata(ms: str) -> str:
|
78
|
-
"""
|
79
|
-
Switch Metadata provider.
|
80
|
-
|
81
|
-
This call has a global effect. Selecting the local metadata will,
|
82
|
-
for example, not allow access to information stored in remote
|
83
|
-
metadata providers.
|
84
|
-
|
85
|
-
Note that you don't typically have to call this function directly. Usually
|
86
|
-
the metadata provider is set through the Metaflow configuration file. If you
|
87
|
-
need to switch between multiple providers, you can use the `METAFLOW_PROFILE`
|
88
|
-
environment variable to switch between configurations.
|
89
|
-
|
90
|
-
Parameters
|
91
|
-
----------
|
92
|
-
ms : str
|
93
|
-
Can be a path (selects local metadata), a URL starting with http (selects
|
94
|
-
the service metadata) or an explicit specification <metadata_type>@<info>; as an
|
95
|
-
example, you can specify local@<path> or service@<url>.
|
96
|
-
|
97
|
-
Returns
|
98
|
-
-------
|
99
|
-
str
|
100
|
-
The description of the metadata selected (equivalent to the result of
|
101
|
-
get_metadata()).
|
102
|
-
"""
|
103
|
-
...
|
16
|
+
from . import info_file as info_file
|
17
|
+
from . import exception as exception
|
18
|
+
from . import metaflow_config as metaflow_config
|
19
|
+
from . import multicore_utils as multicore_utils
|
20
|
+
from .multicore_utils import parallel_imap_unordered as parallel_imap_unordered
|
21
|
+
from .multicore_utils import parallel_map as parallel_map
|
22
|
+
from . import metaflow_current as metaflow_current
|
23
|
+
from .metaflow_current import current as current
|
24
|
+
from . import parameters as parameters
|
25
|
+
from . import tagging_util as tagging_util
|
26
|
+
from . import metadata_provider as metadata_provider
|
27
|
+
from . import flowspec as flowspec
|
28
|
+
from .flowspec import FlowSpec as FlowSpec
|
29
|
+
from .parameters import Parameter as Parameter
|
30
|
+
from .parameters import JSONTypeClass as JSONTypeClass
|
31
|
+
from .parameters import JSONType as JSONType
|
32
|
+
from . import events as events
|
33
|
+
from . import tuple_util as tuple_util
|
34
|
+
from . import runner as runner
|
35
|
+
from . import plugins as plugins
|
36
|
+
from .plugins.datatools.s3.s3 import S3 as S3
|
37
|
+
from . import includefile as includefile
|
38
|
+
from .includefile import IncludeFile as IncludeFile
|
39
|
+
from . import cards as cards
|
40
|
+
from . import client as client
|
41
|
+
from .client.core import namespace as namespace
|
42
|
+
from .client.core import get_namespace as get_namespace
|
43
|
+
from .client.core import default_namespace as default_namespace
|
44
|
+
from .client.core import metadata as metadata
|
45
|
+
from .client.core import get_metadata as get_metadata
|
46
|
+
from .client.core import default_metadata as default_metadata
|
47
|
+
from .client.core import Metaflow as Metaflow
|
48
|
+
from .client.core import Flow as Flow
|
49
|
+
from .client.core import Run as Run
|
50
|
+
from .client.core import Step as Step
|
51
|
+
from .client.core import Task as Task
|
52
|
+
from .client.core import DataArtifact as DataArtifact
|
53
|
+
from .runner.metaflow_runner import Runner as Runner
|
54
|
+
from .runner.nbrun import NBRunner as NBRunner
|
55
|
+
from .runner.deployer import Deployer as Deployer
|
56
|
+
from .runner.deployer import DeployedFlow as DeployedFlow
|
57
|
+
from .runner.nbdeploy import NBDeployer as NBDeployer
|
58
|
+
from . import version as version
|
59
|
+
from . import system as system
|
60
|
+
from . import pylint_wrapper as pylint_wrapper
|
61
|
+
from . import procpoll as procpoll
|
62
|
+
from . import clone_util as clone_util
|
63
|
+
from . import cli as cli
|
104
64
|
|
105
|
-
|
106
|
-
"""
|
107
|
-
Main class from which all Flows should inherit.
|
108
|
-
|
109
|
-
Attributes
|
110
|
-
----------
|
111
|
-
index
|
112
|
-
input
|
113
|
-
"""
|
114
|
-
def __init__(self, use_cli = True):
|
115
|
-
"""
|
116
|
-
Construct a FlowSpec
|
117
|
-
|
118
|
-
Parameters
|
119
|
-
----------
|
120
|
-
use_cli : bool, default True
|
121
|
-
Set to True if the flow is invoked from __main__ or the command line
|
122
|
-
"""
|
123
|
-
...
|
124
|
-
@property
|
125
|
-
def script_name(self) -> str:
|
126
|
-
"""
|
127
|
-
[Legacy function - do not use. Use `current` instead]
|
128
|
-
|
129
|
-
Returns the name of the script containing the flow
|
130
|
-
|
131
|
-
Returns
|
132
|
-
-------
|
133
|
-
str
|
134
|
-
A string containing the name of the script
|
135
|
-
"""
|
136
|
-
...
|
137
|
-
def __iter__(self):
|
138
|
-
"""
|
139
|
-
[Legacy function - do not use]
|
140
|
-
|
141
|
-
Iterate over all steps in the Flow
|
142
|
-
|
143
|
-
Returns
|
144
|
-
-------
|
145
|
-
Iterator[graph.DAGNode]
|
146
|
-
Iterator over the steps in the flow
|
147
|
-
"""
|
148
|
-
...
|
149
|
-
def __getattr__(self, name: str):
|
150
|
-
...
|
151
|
-
def cmd(self, cmdline, input = {}, output = []):
|
152
|
-
"""
|
153
|
-
[Legacy function - do not use]
|
154
|
-
"""
|
155
|
-
...
|
156
|
-
@property
|
157
|
-
def index(self) -> typing.Optional[int]:
|
158
|
-
"""
|
159
|
-
The index of this foreach branch.
|
160
|
-
|
161
|
-
In a foreach step, multiple instances of this step (tasks) will be executed,
|
162
|
-
one for each element in the foreach. This property returns the zero based index
|
163
|
-
of the current task. If this is not a foreach step, this returns None.
|
164
|
-
|
165
|
-
If you need to know the indices of the parent tasks in a nested foreach, use
|
166
|
-
`FlowSpec.foreach_stack`.
|
167
|
-
|
168
|
-
Returns
|
169
|
-
-------
|
170
|
-
int, optional
|
171
|
-
Index of the task in a foreach step.
|
172
|
-
"""
|
173
|
-
...
|
174
|
-
@property
|
175
|
-
def input(self) -> typing.Optional[typing.Any]:
|
176
|
-
"""
|
177
|
-
The value of the foreach artifact in this foreach branch.
|
178
|
-
|
179
|
-
In a foreach step, multiple instances of this step (tasks) will be executed,
|
180
|
-
one for each element in the foreach. This property returns the element passed
|
181
|
-
to the current task. If this is not a foreach step, this returns None.
|
182
|
-
|
183
|
-
If you need to know the values of the parent tasks in a nested foreach, use
|
184
|
-
`FlowSpec.foreach_stack`.
|
185
|
-
|
186
|
-
Returns
|
187
|
-
-------
|
188
|
-
object, optional
|
189
|
-
Input passed to the foreach task.
|
190
|
-
"""
|
191
|
-
...
|
192
|
-
def foreach_stack(self) -> typing.Optional[typing.List[typing.Tuple[int, int, typing.Any]]]:
|
193
|
-
"""
|
194
|
-
Returns the current stack of foreach indexes and values for the current step.
|
195
|
-
|
196
|
-
Use this information to understand what data is being processed in the current
|
197
|
-
foreach branch. For example, considering the following code:
|
198
|
-
```
|
199
|
-
@step
|
200
|
-
def root(self):
|
201
|
-
self.split_1 = ['a', 'b', 'c']
|
202
|
-
self.next(self.nest_1, foreach='split_1')
|
203
|
-
|
204
|
-
@step
|
205
|
-
def nest_1(self):
|
206
|
-
self.split_2 = ['d', 'e', 'f', 'g']
|
207
|
-
self.next(self.nest_2, foreach='split_2'):
|
208
|
-
|
209
|
-
@step
|
210
|
-
def nest_2(self):
|
211
|
-
foo = self.foreach_stack()
|
212
|
-
```
|
213
|
-
|
214
|
-
`foo` will take the following values in the various tasks for nest_2:
|
215
|
-
```
|
216
|
-
[(0, 3, 'a'), (0, 4, 'd')]
|
217
|
-
[(0, 3, 'a'), (1, 4, 'e')]
|
218
|
-
...
|
219
|
-
[(0, 3, 'a'), (3, 4, 'g')]
|
220
|
-
[(1, 3, 'b'), (0, 4, 'd')]
|
221
|
-
...
|
222
|
-
```
|
223
|
-
where each tuple corresponds to:
|
224
|
-
|
225
|
-
- The index of the task for that level of the loop.
|
226
|
-
- The number of splits for that level of the loop.
|
227
|
-
- The value for that level of the loop.
|
228
|
-
|
229
|
-
Note that the last tuple returned in a task corresponds to:
|
230
|
-
|
231
|
-
- 1st element: value returned by `self.index`.
|
232
|
-
- 3rd element: value returned by `self.input`.
|
233
|
-
|
234
|
-
Returns
|
235
|
-
-------
|
236
|
-
List[Tuple[int, int, Any]]
|
237
|
-
An array describing the current stack of foreach steps.
|
238
|
-
"""
|
239
|
-
...
|
240
|
-
def merge_artifacts(self, inputs: metaflow.datastore.inputs.Inputs, exclude: typing.Optional[typing.List[str]] = None, include: typing.Optional[typing.List[str]] = None):
|
241
|
-
"""
|
242
|
-
Helper function for merging artifacts in a join step.
|
243
|
-
|
244
|
-
This function takes all the artifacts coming from the branches of a
|
245
|
-
join point and assigns them to self in the calling step. Only artifacts
|
246
|
-
not set in the current step are considered. If, for a given artifact, different
|
247
|
-
values are present on the incoming edges, an error will be thrown and the artifacts
|
248
|
-
that conflict will be reported.
|
249
|
-
|
250
|
-
As a few examples, in the simple graph: A splitting into B and C and joining in D:
|
251
|
-
```
|
252
|
-
A:
|
253
|
-
self.x = 5
|
254
|
-
self.y = 6
|
255
|
-
B:
|
256
|
-
self.b_var = 1
|
257
|
-
self.x = from_b
|
258
|
-
C:
|
259
|
-
self.x = from_c
|
260
|
-
|
261
|
-
D:
|
262
|
-
merge_artifacts(inputs)
|
263
|
-
```
|
264
|
-
In D, the following artifacts are set:
|
265
|
-
- `y` (value: 6), `b_var` (value: 1)
|
266
|
-
- if `from_b` and `from_c` are the same, `x` will be accessible and have value `from_b`
|
267
|
-
- if `from_b` and `from_c` are different, an error will be thrown. To prevent this error,
|
268
|
-
you need to manually set `self.x` in D to a merged value (for example the max) prior to
|
269
|
-
calling `merge_artifacts`.
|
270
|
-
|
271
|
-
Parameters
|
272
|
-
----------
|
273
|
-
inputs : Inputs
|
274
|
-
Incoming steps to the join point.
|
275
|
-
exclude : List[str], optional, default None
|
276
|
-
If specified, do not consider merging artifacts with a name in `exclude`.
|
277
|
-
Cannot specify if `include` is also specified.
|
278
|
-
include : List[str], optional, default None
|
279
|
-
If specified, only merge artifacts specified. Cannot specify if `exclude` is
|
280
|
-
also specified.
|
281
|
-
|
282
|
-
Raises
|
283
|
-
------
|
284
|
-
MetaflowException
|
285
|
-
This exception is thrown if this is not called in a join step.
|
286
|
-
UnhandledInMergeArtifactsException
|
287
|
-
This exception is thrown in case of unresolved conflicts.
|
288
|
-
MissingInMergeArtifactsException
|
289
|
-
This exception is thrown in case an artifact specified in `include` cannot
|
290
|
-
be found.
|
291
|
-
"""
|
292
|
-
...
|
293
|
-
def next(self, *dsts: typing.Callable[..., None], **kwargs):
|
294
|
-
"""
|
295
|
-
Indicates the next step to execute after this step has completed.
|
296
|
-
|
297
|
-
This statement should appear as the last statement of each step, except
|
298
|
-
the end step.
|
299
|
-
|
300
|
-
There are several valid formats to specify the next step:
|
301
|
-
|
302
|
-
- Straight-line connection: `self.next(self.next_step)` where `next_step` is a method in
|
303
|
-
the current class decorated with the `@step` decorator.
|
304
|
-
|
305
|
-
- Static fan-out connection: `self.next(self.step1, self.step2, ...)` where `stepX` are
|
306
|
-
methods in the current class decorated with the `@step` decorator.
|
307
|
-
|
308
|
-
- Foreach branch:
|
309
|
-
```
|
310
|
-
self.next(self.foreach_step, foreach='foreach_iterator')
|
311
|
-
```
|
312
|
-
In this situation, `foreach_step` is a method in the current class decorated with the
|
313
|
-
`@step` decorator and `foreach_iterator` is a variable name in the current class that
|
314
|
-
evaluates to an iterator. A task will be launched for each value in the iterator and
|
315
|
-
each task will execute the code specified by the step `foreach_step`.
|
316
|
-
|
317
|
-
Parameters
|
318
|
-
----------
|
319
|
-
dsts : Callable[..., None]
|
320
|
-
One or more methods annotated with `@step`.
|
321
|
-
|
322
|
-
Raises
|
323
|
-
------
|
324
|
-
InvalidNextException
|
325
|
-
Raised if the format of the arguments does not match one of the ones given above.
|
326
|
-
"""
|
327
|
-
...
|
328
|
-
def __str__(self):
|
329
|
-
...
|
330
|
-
def __getstate__(self):
|
331
|
-
...
|
332
|
-
...
|
333
|
-
|
334
|
-
class Parameter(object, metaclass=type):
|
335
|
-
"""
|
336
|
-
Defines a parameter for a flow.
|
337
|
-
|
338
|
-
Parameters must be instantiated as class variables in flow classes, e.g.
|
339
|
-
```
|
340
|
-
class MyFlow(FlowSpec):
|
341
|
-
param = Parameter('myparam')
|
342
|
-
```
|
343
|
-
in this case, the parameter is specified on the command line as
|
344
|
-
```
|
345
|
-
python myflow.py run --myparam=5
|
346
|
-
```
|
347
|
-
and its value is accessible through a read-only artifact like this:
|
348
|
-
```
|
349
|
-
print(self.param == 5)
|
350
|
-
```
|
351
|
-
Note that the user-visible parameter name, `myparam` above, can be
|
352
|
-
different from the artifact name, `param` above.
|
353
|
-
|
354
|
-
The parameter value is converted to a Python type based on the `type`
|
355
|
-
argument or to match the type of `default`, if it is set.
|
356
|
-
|
357
|
-
Parameters
|
358
|
-
----------
|
359
|
-
name : str
|
360
|
-
User-visible parameter name.
|
361
|
-
default : str or float or int or bool or `JSONType` or a function.
|
362
|
-
Default value for the parameter. Use a special `JSONType` class to
|
363
|
-
indicate that the value must be a valid JSON object. A function
|
364
|
-
implies that the parameter corresponds to a *deploy-time parameter*.
|
365
|
-
The type of the default value is used as the parameter `type`.
|
366
|
-
type : Type, default None
|
367
|
-
If `default` is not specified, define the parameter type. Specify
|
368
|
-
one of `str`, `float`, `int`, `bool`, or `JSONType`. If None, defaults
|
369
|
-
to the type of `default` or `str` if none specified.
|
370
|
-
help : str, optional
|
371
|
-
Help text to show in `run --help`.
|
372
|
-
required : bool, default False
|
373
|
-
Require that the user specified a value for the parameter.
|
374
|
-
`required=True` implies that the `default` is not used.
|
375
|
-
show_default : bool, default True
|
376
|
-
If True, show the default value in the help text.
|
377
|
-
"""
|
378
|
-
def __init__(self, name: str, default: typing.Union[str, float, int, bool, typing.Dict[str, typing.Any], typing.Callable[[], typing.Union[str, float, int, bool, typing.Dict[str, typing.Any]]], None] = None, type: typing.Union[typing.Type[str], typing.Type[float], typing.Type[int], typing.Type[bool], metaflow.parameters.JSONTypeClass, None] = None, help: typing.Optional[str] = None, required: bool = False, show_default: bool = True, **kwargs: typing.Dict[str, typing.Any]):
|
379
|
-
...
|
380
|
-
def __repr__(self):
|
381
|
-
...
|
382
|
-
def __str__(self):
|
383
|
-
...
|
384
|
-
def option_kwargs(self, deploy_mode):
|
385
|
-
...
|
386
|
-
def load_parameter(self, v):
|
387
|
-
...
|
388
|
-
@property
|
389
|
-
def is_string_type(self):
|
390
|
-
...
|
391
|
-
def __getitem__(self, x):
|
392
|
-
...
|
393
|
-
...
|
394
|
-
|
395
|
-
class JSONTypeClass(metaflow._vendor.click.types.ParamType, metaclass=type):
|
396
|
-
def convert(self, value, param, ctx):
|
397
|
-
...
|
398
|
-
def __str__(self):
|
399
|
-
...
|
400
|
-
def __repr__(self):
|
401
|
-
...
|
402
|
-
...
|
403
|
-
|
404
|
-
JSONType: metaflow.parameters.JSONTypeClass
|
405
|
-
|
406
|
-
class S3(object, metaclass=type):
|
407
|
-
"""
|
408
|
-
The Metaflow S3 client.
|
409
|
-
|
410
|
-
This object manages the connection to S3 and a temporary diretory that is used
|
411
|
-
to download objects. Note that in most cases when the data fits in memory, no local
|
412
|
-
disk IO is needed as operations are cached by the operating system, which makes
|
413
|
-
operations fast as long as there is enough memory available.
|
414
|
-
|
415
|
-
The easiest way is to use this object as a context manager:
|
416
|
-
```
|
417
|
-
with S3() as s3:
|
418
|
-
data = [obj.blob for obj in s3.get_many(urls)]
|
419
|
-
print(data)
|
420
|
-
```
|
421
|
-
The context manager takes care of creating and deleting a temporary directory
|
422
|
-
automatically. Without a context manager, you must call `.close()` to delete
|
423
|
-
the directory explicitly:
|
424
|
-
```
|
425
|
-
s3 = S3()
|
426
|
-
data = [obj.blob for obj in s3.get_many(urls)]
|
427
|
-
s3.close()
|
428
|
-
```
|
429
|
-
You can customize the location of the temporary directory with `tmproot`. It
|
430
|
-
defaults to the current working directory.
|
431
|
-
|
432
|
-
To make it easier to deal with object locations, the client can be initialized
|
433
|
-
with an S3 path prefix. There are three ways to handle locations:
|
434
|
-
|
435
|
-
1. Use a `metaflow.Run` object or `self`, e.g. `S3(run=self)` which
|
436
|
-
initializes the prefix with the global `DATATOOLS_S3ROOT` path, combined
|
437
|
-
with the current run ID. This mode makes it easy to version data based
|
438
|
-
on the run ID consistently. You can use the `bucket` and `prefix` to
|
439
|
-
override parts of `DATATOOLS_S3ROOT`.
|
440
|
-
|
441
|
-
2. Specify an S3 prefix explicitly with `s3root`,
|
442
|
-
e.g. `S3(s3root='s3://mybucket/some/path')`.
|
443
|
-
|
444
|
-
3. Specify nothing, i.e. `S3()`, in which case all operations require
|
445
|
-
a full S3 url prefixed with `s3://`.
|
446
|
-
|
447
|
-
Parameters
|
448
|
-
----------
|
449
|
-
tmproot : str, default: '.'
|
450
|
-
Where to store the temporary directory.
|
451
|
-
bucket : str, optional
|
452
|
-
Override the bucket from `DATATOOLS_S3ROOT` when `run` is specified.
|
453
|
-
prefix : str, optional
|
454
|
-
Override the path from `DATATOOLS_S3ROOT` when `run` is specified.
|
455
|
-
run : FlowSpec or Run, optional
|
456
|
-
Derive path prefix from the current or a past run ID, e.g. S3(run=self).
|
457
|
-
s3root : str, optional
|
458
|
-
If `run` is not specified, use this as the S3 prefix.
|
459
|
-
"""
|
460
|
-
@classmethod
|
461
|
-
def get_root_from_config(cls, echo, create_on_absent = True):
|
462
|
-
...
|
463
|
-
def __enter__(self) -> metaflow.plugins.datatools.s3.s3.S3:
|
464
|
-
...
|
465
|
-
def __exit__(self, *args):
|
466
|
-
...
|
467
|
-
def close(self):
|
468
|
-
"""
|
469
|
-
Delete all temporary files downloaded in this context.
|
470
|
-
"""
|
471
|
-
...
|
472
|
-
def list_paths(self, keys: typing.Optional[typing.Iterable[str]] = None) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
473
|
-
"""
|
474
|
-
List the next level of paths in S3.
|
475
|
-
|
476
|
-
If multiple keys are specified, listings are done in parallel. The returned
|
477
|
-
S3Objects have `.exists == False` if the path refers to a prefix, not an
|
478
|
-
existing S3 object.
|
479
|
-
|
480
|
-
For instance, if the directory hierarchy is
|
481
|
-
```
|
482
|
-
a/0.txt
|
483
|
-
a/b/1.txt
|
484
|
-
a/c/2.txt
|
485
|
-
a/d/e/3.txt
|
486
|
-
f/4.txt
|
487
|
-
```
|
488
|
-
The `list_paths(['a', 'f'])` call returns
|
489
|
-
```
|
490
|
-
a/0.txt (exists == True)
|
491
|
-
a/b/ (exists == False)
|
492
|
-
a/c/ (exists == False)
|
493
|
-
a/d/ (exists == False)
|
494
|
-
f/4.txt (exists == True)
|
495
|
-
```
|
496
|
-
|
497
|
-
Parameters
|
498
|
-
----------
|
499
|
-
keys : Iterable[str], optional, default None
|
500
|
-
List of paths.
|
501
|
-
|
502
|
-
Returns
|
503
|
-
-------
|
504
|
-
List[S3Object]
|
505
|
-
S3Objects under the given paths, including prefixes (directories) that
|
506
|
-
do not correspond to leaf objects.
|
507
|
-
"""
|
508
|
-
...
|
509
|
-
def list_recursive(self, keys: typing.Optional[typing.Iterable[str]] = None) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
510
|
-
"""
|
511
|
-
List all objects recursively under the given prefixes.
|
512
|
-
|
513
|
-
If multiple keys are specified, listings are done in parallel. All objects
|
514
|
-
returned have `.exists == True` as this call always returns leaf objects.
|
515
|
-
|
516
|
-
For instance, if the directory hierarchy is
|
517
|
-
```
|
518
|
-
a/0.txt
|
519
|
-
a/b/1.txt
|
520
|
-
a/c/2.txt
|
521
|
-
a/d/e/3.txt
|
522
|
-
f/4.txt
|
523
|
-
```
|
524
|
-
The `list_paths(['a', 'f'])` call returns
|
525
|
-
```
|
526
|
-
a/0.txt (exists == True)
|
527
|
-
a/b/1.txt (exists == True)
|
528
|
-
a/c/2.txt (exists == True)
|
529
|
-
a/d/e/3.txt (exists == True)
|
530
|
-
f/4.txt (exists == True)
|
531
|
-
```
|
532
|
-
|
533
|
-
Parameters
|
534
|
-
----------
|
535
|
-
keys : Iterable[str], optional, default None
|
536
|
-
List of paths.
|
537
|
-
|
538
|
-
Returns
|
539
|
-
-------
|
540
|
-
List[S3Object]
|
541
|
-
S3Objects under the given paths.
|
542
|
-
"""
|
543
|
-
...
|
544
|
-
def info(self, key: typing.Optional[str] = None, return_missing: bool = False) -> metaflow.plugins.datatools.s3.s3.S3Object:
|
545
|
-
"""
|
546
|
-
Get metadata about a single object in S3.
|
547
|
-
|
548
|
-
This call makes a single `HEAD` request to S3 which can be
|
549
|
-
much faster than downloading all data with `get`.
|
550
|
-
|
551
|
-
Parameters
|
552
|
-
----------
|
553
|
-
key : str, optional, default None
|
554
|
-
Object to query. It can be an S3 url or a path suffix.
|
555
|
-
return_missing : bool, default False
|
556
|
-
If set to True, do not raise an exception for a missing key but
|
557
|
-
return it as an `S3Object` with `.exists == False`.
|
558
|
-
|
559
|
-
Returns
|
560
|
-
-------
|
561
|
-
S3Object
|
562
|
-
An S3Object corresponding to the object requested. The object
|
563
|
-
will have `.downloaded == False`.
|
564
|
-
"""
|
565
|
-
...
|
566
|
-
def info_many(self, keys: typing.Iterable[str], return_missing: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
567
|
-
"""
|
568
|
-
Get metadata about many objects in S3 in parallel.
|
569
|
-
|
570
|
-
This call makes a single `HEAD` request to S3 which can be
|
571
|
-
much faster than downloading all data with `get`.
|
572
|
-
|
573
|
-
Parameters
|
574
|
-
----------
|
575
|
-
keys : Iterable[str]
|
576
|
-
Objects to query. Each key can be an S3 url or a path suffix.
|
577
|
-
return_missing : bool, default False
|
578
|
-
If set to True, do not raise an exception for a missing key but
|
579
|
-
return it as an `S3Object` with `.exists == False`.
|
580
|
-
|
581
|
-
Returns
|
582
|
-
-------
|
583
|
-
List[S3Object]
|
584
|
-
A list of S3Objects corresponding to the paths requested. The
|
585
|
-
objects will have `.downloaded == False`.
|
586
|
-
"""
|
587
|
-
...
|
588
|
-
def get(self, key: typing.Union[str, metaflow.plugins.datatools.s3.s3.S3GetObject, None] = None, return_missing: bool = False, return_info: bool = True) -> metaflow.plugins.datatools.s3.s3.S3Object:
|
589
|
-
"""
|
590
|
-
Get a single object from S3.
|
591
|
-
|
592
|
-
Parameters
|
593
|
-
----------
|
594
|
-
key : Union[str, S3GetObject], optional, default None
|
595
|
-
Object to download. It can be an S3 url, a path suffix, or
|
596
|
-
an S3GetObject that defines a range of data to download. If None, or
|
597
|
-
not provided, gets the S3 root.
|
598
|
-
return_missing : bool, default False
|
599
|
-
If set to True, do not raise an exception for a missing key but
|
600
|
-
return it as an `S3Object` with `.exists == False`.
|
601
|
-
return_info : bool, default True
|
602
|
-
If set to True, fetch the content-type and user metadata associated
|
603
|
-
with the object at no extra cost, included for symmetry with `get_many`
|
604
|
-
|
605
|
-
Returns
|
606
|
-
-------
|
607
|
-
S3Object
|
608
|
-
An S3Object corresponding to the object requested.
|
609
|
-
"""
|
610
|
-
...
|
611
|
-
def get_many(self, keys: typing.Iterable[typing.Union[str, metaflow.plugins.datatools.s3.s3.S3GetObject]], return_missing: bool = False, return_info: bool = True) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
612
|
-
"""
|
613
|
-
Get many objects from S3 in parallel.
|
614
|
-
|
615
|
-
Parameters
|
616
|
-
----------
|
617
|
-
keys : Iterable[Union[str, S3GetObject]]
|
618
|
-
Objects to download. Each object can be an S3 url, a path suffix, or
|
619
|
-
an S3GetObject that defines a range of data to download.
|
620
|
-
return_missing : bool, default False
|
621
|
-
If set to True, do not raise an exception for a missing key but
|
622
|
-
return it as an `S3Object` with `.exists == False`.
|
623
|
-
return_info : bool, default True
|
624
|
-
If set to True, fetch the content-type and user metadata associated
|
625
|
-
with the object at no extra cost, included for symmetry with `get_many`.
|
626
|
-
|
627
|
-
Returns
|
628
|
-
-------
|
629
|
-
List[S3Object]
|
630
|
-
S3Objects corresponding to the objects requested.
|
631
|
-
"""
|
632
|
-
...
|
633
|
-
def get_recursive(self, keys: typing.Iterable[str], return_info: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
634
|
-
"""
|
635
|
-
Get many objects from S3 recursively in parallel.
|
636
|
-
|
637
|
-
Parameters
|
638
|
-
----------
|
639
|
-
keys : Iterable[str]
|
640
|
-
Prefixes to download recursively. Each prefix can be an S3 url or a path suffix
|
641
|
-
which define the root prefix under which all objects are downloaded.
|
642
|
-
return_info : bool, default False
|
643
|
-
If set to True, fetch the content-type and user metadata associated
|
644
|
-
with the object.
|
645
|
-
|
646
|
-
Returns
|
647
|
-
-------
|
648
|
-
List[S3Object]
|
649
|
-
S3Objects stored under the given prefixes.
|
650
|
-
"""
|
651
|
-
...
|
652
|
-
def get_all(self, return_info: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
653
|
-
"""
|
654
|
-
Get all objects under the prefix set in the `S3` constructor.
|
655
|
-
|
656
|
-
This method requires that the `S3` object is initialized either with `run` or
|
657
|
-
`s3root`.
|
658
|
-
|
659
|
-
Parameters
|
660
|
-
----------
|
661
|
-
return_info : bool, default False
|
662
|
-
If set to True, fetch the content-type and user metadata associated
|
663
|
-
with the object.
|
664
|
-
|
665
|
-
Returns
|
666
|
-
-------
|
667
|
-
Iterable[S3Object]
|
668
|
-
S3Objects stored under the main prefix.
|
669
|
-
"""
|
670
|
-
...
|
671
|
-
def put(self, key: typing.Union[str, metaflow.plugins.datatools.s3.s3.S3PutObject], obj: typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes], overwrite: bool = True, content_type: typing.Optional[str] = None, metadata: typing.Optional[typing.Dict[str, str]] = None) -> str:
|
672
|
-
"""
|
673
|
-
Upload a single object to S3.
|
674
|
-
|
675
|
-
Parameters
|
676
|
-
----------
|
677
|
-
key : Union[str, S3PutObject]
|
678
|
-
Object path. It can be an S3 url or a path suffix.
|
679
|
-
obj : PutValue
|
680
|
-
An object to store in S3. Strings are converted to UTF-8 encoding.
|
681
|
-
overwrite : bool, default True
|
682
|
-
Overwrite the object if it exists. If set to False, the operation
|
683
|
-
succeeds without uploading anything if the key already exists.
|
684
|
-
content_type : str, optional, default None
|
685
|
-
Optional MIME type for the object.
|
686
|
-
metadata : Dict[str, str], optional, default None
|
687
|
-
A JSON-encodable dictionary of additional headers to be stored
|
688
|
-
as metadata with the object.
|
689
|
-
|
690
|
-
Returns
|
691
|
-
-------
|
692
|
-
str
|
693
|
-
URL of the object stored.
|
694
|
-
"""
|
695
|
-
...
|
696
|
-
def put_many(self, key_objs: typing.List[typing.Union[typing.Tuple[str, typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes]], metaflow.plugins.datatools.s3.s3.S3PutObject]], overwrite: bool = True) -> typing.List[typing.Tuple[str, str]]:
|
697
|
-
"""
|
698
|
-
Upload many objects to S3.
|
699
|
-
|
700
|
-
Each object to be uploaded can be specified in two ways:
|
701
|
-
|
702
|
-
1. As a `(key, obj)` tuple where `key` is a string specifying
|
703
|
-
the path and `obj` is a string or a bytes object.
|
704
|
-
|
705
|
-
2. As a `S3PutObject` which contains additional metadata to be
|
706
|
-
stored with the object.
|
707
|
-
|
708
|
-
Parameters
|
709
|
-
----------
|
710
|
-
key_objs : List[Union[Tuple[str, PutValue], S3PutObject]]
|
711
|
-
List of key-object pairs to upload.
|
712
|
-
overwrite : bool, default True
|
713
|
-
Overwrite the object if it exists. If set to False, the operation
|
714
|
-
succeeds without uploading anything if the key already exists.
|
715
|
-
|
716
|
-
Returns
|
717
|
-
-------
|
718
|
-
List[Tuple[str, str]]
|
719
|
-
List of `(key, url)` pairs corresponding to the objects uploaded.
|
720
|
-
"""
|
721
|
-
...
|
722
|
-
def put_files(self, key_paths: typing.List[typing.Union[typing.Tuple[str, typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes]], metaflow.plugins.datatools.s3.s3.S3PutObject]], overwrite: bool = True) -> typing.List[typing.Tuple[str, str]]:
|
723
|
-
"""
|
724
|
-
Upload many local files to S3.
|
725
|
-
|
726
|
-
Each file to be uploaded can be specified in two ways:
|
727
|
-
|
728
|
-
1. As a `(key, path)` tuple where `key` is a string specifying
|
729
|
-
the S3 path and `path` is the path to a local file.
|
730
|
-
|
731
|
-
2. As a `S3PutObject` which contains additional metadata to be
|
732
|
-
stored with the file.
|
733
|
-
|
734
|
-
Parameters
|
735
|
-
----------
|
736
|
-
key_paths : List[Union[Tuple[str, PutValue], S3PutObject]]
|
737
|
-
List of files to upload.
|
738
|
-
overwrite : bool, default True
|
739
|
-
Overwrite the object if it exists. If set to False, the operation
|
740
|
-
succeeds without uploading anything if the key already exists.
|
741
|
-
|
742
|
-
Returns
|
743
|
-
-------
|
744
|
-
List[Tuple[str, str]]
|
745
|
-
List of `(key, url)` pairs corresponding to the files uploaded.
|
746
|
-
"""
|
747
|
-
...
|
748
|
-
...
|
749
|
-
|
750
|
-
class IncludeFile(metaflow.parameters.Parameter, metaclass=type):
|
751
|
-
"""
|
752
|
-
Includes a local file as a parameter for the flow.
|
753
|
-
|
754
|
-
`IncludeFile` behaves like `Parameter` except that it reads its value from a file instead of
|
755
|
-
the command line. The user provides a path to a file on the command line. The file contents
|
756
|
-
are saved as a read-only artifact which is available in all steps of the flow.
|
757
|
-
|
758
|
-
Parameters
|
759
|
-
----------
|
760
|
-
name : str
|
761
|
-
User-visible parameter name.
|
762
|
-
default : Union[str, Callable[ParameterContext, str]]
|
763
|
-
Default path to a local file. A function
|
764
|
-
implies that the parameter corresponds to a *deploy-time parameter*.
|
765
|
-
is_text : bool, default True
|
766
|
-
Convert the file contents to a string using the provided `encoding`.
|
767
|
-
If False, the artifact is stored in `bytes`.
|
768
|
-
encoding : str, optional, default 'utf-8'
|
769
|
-
Use this encoding to decode the file contexts if `is_text=True`.
|
770
|
-
required : bool, default False
|
771
|
-
Require that the user specified a value for the parameter.
|
772
|
-
`required=True` implies that the `default` is not used.
|
773
|
-
help : str, optional
|
774
|
-
Help text to show in `run --help`.
|
775
|
-
show_default : bool, default True
|
776
|
-
If True, show the default value in the help text.
|
777
|
-
"""
|
778
|
-
def __init__(self, name: str, required: bool = False, is_text: bool = True, encoding: str = "utf-8", help: typing.Optional[str] = None, **kwargs: typing.Dict[str, str]):
|
779
|
-
...
|
780
|
-
def load_parameter(self, v):
|
781
|
-
...
|
782
|
-
...
|
65
|
+
EXT_PKG: str
|
783
66
|
|
784
67
|
@typing.overload
|
785
68
|
def step(f: typing.Callable[[FlowSpecDerived], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
@@ -855,189 +138,73 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
|
|
855
138
|
...
|
856
139
|
|
857
140
|
@typing.overload
|
858
|
-
def
|
141
|
+
def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
859
142
|
"""
|
860
|
-
Specifies the
|
861
|
-
|
862
|
-
Use `@resources` to specify the resource requirements
|
863
|
-
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
864
|
-
|
865
|
-
You can choose the compute layer on the command line by executing e.g.
|
866
|
-
```
|
867
|
-
python myflow.py run --with batch
|
868
|
-
```
|
869
|
-
or
|
870
|
-
```
|
871
|
-
python myflow.py run --with kubernetes
|
872
|
-
```
|
873
|
-
which executes the flow on the desired system using the
|
874
|
-
requirements specified in `@resources`.
|
143
|
+
Specifies the Conda environment for the step.
|
875
144
|
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
gpu : int, optional, default None
|
881
|
-
Number of GPUs required for this step.
|
882
|
-
disk : int, optional, default None
|
883
|
-
Disk size (in MB) required for this step. Only applies on Kubernetes.
|
884
|
-
memory : int, default 4096
|
885
|
-
Memory size (in MB) required for this step.
|
886
|
-
shared_memory : int, optional, default None
|
887
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
888
|
-
This parameter maps to the `--shm-size` option in Docker.
|
145
|
+
Information in this decorator will augment any
|
146
|
+
attributes set in the `@conda_base` flow-level decorator. Hence,
|
147
|
+
you can use `@conda_base` to set packages required by all
|
148
|
+
steps and use `@conda` to specify step-specific overrides.
|
889
149
|
"""
|
890
150
|
...
|
891
151
|
|
892
152
|
@typing.overload
|
893
|
-
def
|
153
|
+
def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
894
154
|
...
|
895
155
|
|
896
156
|
@typing.overload
|
897
|
-
def
|
157
|
+
def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
898
158
|
...
|
899
159
|
|
900
|
-
def
|
160
|
+
def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
|
901
161
|
"""
|
902
|
-
Specifies the
|
903
|
-
|
904
|
-
Use `@resources` to specify the resource requirements
|
905
|
-
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
906
|
-
|
907
|
-
You can choose the compute layer on the command line by executing e.g.
|
908
|
-
```
|
909
|
-
python myflow.py run --with batch
|
910
|
-
```
|
911
|
-
or
|
912
|
-
```
|
913
|
-
python myflow.py run --with kubernetes
|
914
|
-
```
|
915
|
-
which executes the flow on the desired system using the
|
916
|
-
requirements specified in `@resources`.
|
162
|
+
Specifies the Conda environment for the step.
|
917
163
|
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
gpu : int, optional, default None
|
923
|
-
Number of GPUs required for this step.
|
924
|
-
disk : int, optional, default None
|
925
|
-
Disk size (in MB) required for this step. Only applies on Kubernetes.
|
926
|
-
memory : int, default 4096
|
927
|
-
Memory size (in MB) required for this step.
|
928
|
-
shared_memory : int, optional, default None
|
929
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
930
|
-
This parameter maps to the `--shm-size` option in Docker.
|
164
|
+
Information in this decorator will augment any
|
165
|
+
attributes set in the `@conda_base` flow-level decorator. Hence,
|
166
|
+
you can use `@conda_base` to set packages required by all
|
167
|
+
steps and use `@conda` to specify step-specific overrides.
|
931
168
|
"""
|
932
169
|
...
|
933
170
|
|
934
171
|
@typing.overload
|
935
|
-
def
|
172
|
+
def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
936
173
|
"""
|
937
|
-
Specifies
|
938
|
-
to a step needs to be retried.
|
939
|
-
|
940
|
-
This decorator is useful for handling transient errors, such as networking issues.
|
941
|
-
If your task contains operations that can't be retried safely, e.g. database updates,
|
942
|
-
it is advisable to annotate it with `@retry(times=0)`.
|
943
|
-
|
944
|
-
This can be used in conjunction with the `@catch` decorator. The `@catch`
|
945
|
-
decorator will execute a no-op task after all retries have been exhausted,
|
946
|
-
ensuring that the flow execution can continue.
|
947
|
-
|
948
|
-
Parameters
|
949
|
-
----------
|
950
|
-
times : int, default 3
|
951
|
-
Number of times to retry this task.
|
952
|
-
minutes_between_retries : int, default 2
|
953
|
-
Number of minutes between retries.
|
174
|
+
Specifies environment variables to be set prior to the execution of a step.
|
954
175
|
"""
|
955
176
|
...
|
956
177
|
|
957
178
|
@typing.overload
|
958
|
-
def
|
179
|
+
def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
959
180
|
...
|
960
181
|
|
961
182
|
@typing.overload
|
962
|
-
def
|
183
|
+
def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
963
184
|
...
|
964
185
|
|
965
|
-
def
|
186
|
+
def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
|
966
187
|
"""
|
967
|
-
Specifies
|
968
|
-
to a step needs to be retried.
|
969
|
-
|
970
|
-
This decorator is useful for handling transient errors, such as networking issues.
|
971
|
-
If your task contains operations that can't be retried safely, e.g. database updates,
|
972
|
-
it is advisable to annotate it with `@retry(times=0)`.
|
973
|
-
|
974
|
-
This can be used in conjunction with the `@catch` decorator. The `@catch`
|
975
|
-
decorator will execute a no-op task after all retries have been exhausted,
|
976
|
-
ensuring that the flow execution can continue.
|
977
|
-
|
978
|
-
Parameters
|
979
|
-
----------
|
980
|
-
times : int, default 3
|
981
|
-
Number of times to retry this task.
|
982
|
-
minutes_between_retries : int, default 2
|
983
|
-
Number of minutes between retries.
|
188
|
+
Specifies environment variables to be set prior to the execution of a step.
|
984
189
|
"""
|
985
190
|
...
|
986
191
|
|
987
192
|
@typing.overload
|
988
|
-
def
|
193
|
+
def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
989
194
|
"""
|
990
|
-
|
991
|
-
|
992
|
-
This decorator is useful if this step may hang indefinitely.
|
993
|
-
|
994
|
-
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
995
|
-
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
996
|
-
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
997
|
-
|
998
|
-
Note that all the values specified in parameters are added together so if you specify
|
999
|
-
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
1000
|
-
|
1001
|
-
Parameters
|
1002
|
-
----------
|
1003
|
-
seconds : int, default 0
|
1004
|
-
Number of seconds to wait prior to timing out.
|
1005
|
-
minutes : int, default 0
|
1006
|
-
Number of minutes to wait prior to timing out.
|
1007
|
-
hours : int, default 0
|
1008
|
-
Number of hours to wait prior to timing out.
|
195
|
+
Decorator prototype for all step decorators. This function gets specialized
|
196
|
+
and imported for all decorators types by _import_plugin_decorators().
|
1009
197
|
"""
|
1010
198
|
...
|
1011
199
|
|
1012
200
|
@typing.overload
|
1013
|
-
def
|
1014
|
-
...
|
1015
|
-
|
1016
|
-
@typing.overload
|
1017
|
-
def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
201
|
+
def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1018
202
|
...
|
1019
203
|
|
1020
|
-
def
|
204
|
+
def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
|
1021
205
|
"""
|
1022
|
-
|
1023
|
-
|
1024
|
-
This decorator is useful if this step may hang indefinitely.
|
1025
|
-
|
1026
|
-
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
1027
|
-
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
1028
|
-
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
1029
|
-
|
1030
|
-
Note that all the values specified in parameters are added together so if you specify
|
1031
|
-
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
1032
|
-
|
1033
|
-
Parameters
|
1034
|
-
----------
|
1035
|
-
seconds : int, default 0
|
1036
|
-
Number of seconds to wait prior to timing out.
|
1037
|
-
minutes : int, default 0
|
1038
|
-
Number of minutes to wait prior to timing out.
|
1039
|
-
hours : int, default 0
|
1040
|
-
Number of hours to wait prior to timing out.
|
206
|
+
Decorator prototype for all step decorators. This function gets specialized
|
207
|
+
and imported for all decorators types by _import_plugin_decorators().
|
1041
208
|
"""
|
1042
209
|
...
|
1043
210
|
|
@@ -1050,15 +217,6 @@ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] =
|
|
1050
217
|
attributes set in the `@pyi_base` flow-level decorator. Hence,
|
1051
218
|
you can use `@pypi_base` to set packages required by all
|
1052
219
|
steps and use `@pypi` to specify step-specific overrides.
|
1053
|
-
|
1054
|
-
Parameters
|
1055
|
-
----------
|
1056
|
-
packages : Dict[str, str], default: {}
|
1057
|
-
Packages to use for this step. The key is the name of the package
|
1058
|
-
and the value is the version to use.
|
1059
|
-
python : str, optional, default: None
|
1060
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1061
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1062
220
|
"""
|
1063
221
|
...
|
1064
222
|
|
@@ -1078,181 +236,6 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
|
|
1078
236
|
attributes set in the `@pyi_base` flow-level decorator. Hence,
|
1079
237
|
you can use `@pypi_base` to set packages required by all
|
1080
238
|
steps and use `@pypi` to specify step-specific overrides.
|
1081
|
-
|
1082
|
-
Parameters
|
1083
|
-
----------
|
1084
|
-
packages : Dict[str, str], default: {}
|
1085
|
-
Packages to use for this step. The key is the name of the package
|
1086
|
-
and the value is the version to use.
|
1087
|
-
python : str, optional, default: None
|
1088
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1089
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1090
|
-
"""
|
1091
|
-
...
|
1092
|
-
|
1093
|
-
@typing.overload
|
1094
|
-
def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1095
|
-
"""
|
1096
|
-
Decorator prototype for all step decorators. This function gets specialized
|
1097
|
-
and imported for all decorators types by _import_plugin_decorators().
|
1098
|
-
"""
|
1099
|
-
...
|
1100
|
-
|
1101
|
-
@typing.overload
|
1102
|
-
def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1103
|
-
...
|
1104
|
-
|
1105
|
-
def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
|
1106
|
-
"""
|
1107
|
-
Decorator prototype for all step decorators. This function gets specialized
|
1108
|
-
and imported for all decorators types by _import_plugin_decorators().
|
1109
|
-
"""
|
1110
|
-
...
|
1111
|
-
|
1112
|
-
@typing.overload
|
1113
|
-
def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1114
|
-
"""
|
1115
|
-
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
|
1116
|
-
|
1117
|
-
Parameters
|
1118
|
-
----------
|
1119
|
-
cpu : int, default 1
|
1120
|
-
Number of CPUs required for this step. If `@resources` is
|
1121
|
-
also present, the maximum value from all decorators is used.
|
1122
|
-
gpu : int, default 0
|
1123
|
-
Number of GPUs required for this step. If `@resources` is
|
1124
|
-
also present, the maximum value from all decorators is used.
|
1125
|
-
memory : int, default 4096
|
1126
|
-
Memory size (in MB) required for this step. If
|
1127
|
-
`@resources` is also present, the maximum value from all decorators is
|
1128
|
-
used.
|
1129
|
-
image : str, optional, default None
|
1130
|
-
Docker image to use when launching on AWS Batch. If not specified, and
|
1131
|
-
METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
|
1132
|
-
not, a default Docker image mapping to the current version of Python is used.
|
1133
|
-
queue : str, default METAFLOW_BATCH_JOB_QUEUE
|
1134
|
-
AWS Batch Job Queue to submit the job to.
|
1135
|
-
iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
|
1136
|
-
AWS IAM role that AWS Batch container uses to access AWS cloud resources.
|
1137
|
-
execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
|
1138
|
-
AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
|
1139
|
-
(https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
|
1140
|
-
shared_memory : int, optional, default None
|
1141
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
1142
|
-
This parameter maps to the `--shm-size` option in Docker.
|
1143
|
-
max_swap : int, optional, default None
|
1144
|
-
The total amount of swap memory (in MiB) a container can use for this
|
1145
|
-
step. This parameter is translated to the `--memory-swap` option in
|
1146
|
-
Docker where the value is the sum of the container memory plus the
|
1147
|
-
`max_swap` value.
|
1148
|
-
swappiness : int, optional, default None
|
1149
|
-
This allows you to tune memory swappiness behavior for this step.
|
1150
|
-
A swappiness value of 0 causes swapping not to happen unless absolutely
|
1151
|
-
necessary. A swappiness value of 100 causes pages to be swapped very
|
1152
|
-
aggressively. Accepted values are whole numbers between 0 and 100.
|
1153
|
-
use_tmpfs : bool, default False
|
1154
|
-
This enables an explicit tmpfs mount for this step. Note that tmpfs is
|
1155
|
-
not available on Fargate compute environments
|
1156
|
-
tmpfs_tempdir : bool, default True
|
1157
|
-
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
|
1158
|
-
tmpfs_size : int, optional, default None
|
1159
|
-
The value for the size (in MiB) of the tmpfs mount for this step.
|
1160
|
-
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
|
1161
|
-
memory allocated for this step.
|
1162
|
-
tmpfs_path : str, optional, default None
|
1163
|
-
Path to tmpfs mount for this step. Defaults to /metaflow_temp.
|
1164
|
-
inferentia : int, default 0
|
1165
|
-
Number of Inferentia chips required for this step.
|
1166
|
-
trainium : int, default None
|
1167
|
-
Alias for inferentia. Use only one of the two.
|
1168
|
-
efa : int, default 0
|
1169
|
-
Number of elastic fabric adapter network devices to attach to container
|
1170
|
-
ephemeral_storage : int, default None
|
1171
|
-
The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
|
1172
|
-
This is only relevant for Fargate compute environments
|
1173
|
-
log_driver: str, optional, default None
|
1174
|
-
The log driver to use for the Amazon ECS container.
|
1175
|
-
log_options: List[str], optional, default None
|
1176
|
-
List of strings containing options for the chosen log driver. The configurable values
|
1177
|
-
depend on the `log driver` chosen. Validation of these options is not supported yet.
|
1178
|
-
Example: [`awslogs-group:aws/batch/job`]
|
1179
|
-
"""
|
1180
|
-
...
|
1181
|
-
|
1182
|
-
@typing.overload
|
1183
|
-
def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1184
|
-
...
|
1185
|
-
|
1186
|
-
@typing.overload
|
1187
|
-
def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1188
|
-
...
|
1189
|
-
|
1190
|
-
def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = "METAFLOW_BATCH_JOB_QUEUE", iam_role: str = "METAFLOW_ECS_S3_ACCESS_IAM_ROLE", execution_role: str = "METAFLOW_ECS_FARGATE_EXECUTION_ROLE", shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
|
1191
|
-
"""
|
1192
|
-
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
|
1193
|
-
|
1194
|
-
Parameters
|
1195
|
-
----------
|
1196
|
-
cpu : int, default 1
|
1197
|
-
Number of CPUs required for this step. If `@resources` is
|
1198
|
-
also present, the maximum value from all decorators is used.
|
1199
|
-
gpu : int, default 0
|
1200
|
-
Number of GPUs required for this step. If `@resources` is
|
1201
|
-
also present, the maximum value from all decorators is used.
|
1202
|
-
memory : int, default 4096
|
1203
|
-
Memory size (in MB) required for this step. If
|
1204
|
-
`@resources` is also present, the maximum value from all decorators is
|
1205
|
-
used.
|
1206
|
-
image : str, optional, default None
|
1207
|
-
Docker image to use when launching on AWS Batch. If not specified, and
|
1208
|
-
METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
|
1209
|
-
not, a default Docker image mapping to the current version of Python is used.
|
1210
|
-
queue : str, default METAFLOW_BATCH_JOB_QUEUE
|
1211
|
-
AWS Batch Job Queue to submit the job to.
|
1212
|
-
iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
|
1213
|
-
AWS IAM role that AWS Batch container uses to access AWS cloud resources.
|
1214
|
-
execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
|
1215
|
-
AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
|
1216
|
-
(https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
|
1217
|
-
shared_memory : int, optional, default None
|
1218
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
1219
|
-
This parameter maps to the `--shm-size` option in Docker.
|
1220
|
-
max_swap : int, optional, default None
|
1221
|
-
The total amount of swap memory (in MiB) a container can use for this
|
1222
|
-
step. This parameter is translated to the `--memory-swap` option in
|
1223
|
-
Docker where the value is the sum of the container memory plus the
|
1224
|
-
`max_swap` value.
|
1225
|
-
swappiness : int, optional, default None
|
1226
|
-
This allows you to tune memory swappiness behavior for this step.
|
1227
|
-
A swappiness value of 0 causes swapping not to happen unless absolutely
|
1228
|
-
necessary. A swappiness value of 100 causes pages to be swapped very
|
1229
|
-
aggressively. Accepted values are whole numbers between 0 and 100.
|
1230
|
-
use_tmpfs : bool, default False
|
1231
|
-
This enables an explicit tmpfs mount for this step. Note that tmpfs is
|
1232
|
-
not available on Fargate compute environments
|
1233
|
-
tmpfs_tempdir : bool, default True
|
1234
|
-
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
|
1235
|
-
tmpfs_size : int, optional, default None
|
1236
|
-
The value for the size (in MiB) of the tmpfs mount for this step.
|
1237
|
-
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
|
1238
|
-
memory allocated for this step.
|
1239
|
-
tmpfs_path : str, optional, default None
|
1240
|
-
Path to tmpfs mount for this step. Defaults to /metaflow_temp.
|
1241
|
-
inferentia : int, default 0
|
1242
|
-
Number of Inferentia chips required for this step.
|
1243
|
-
trainium : int, default None
|
1244
|
-
Alias for inferentia. Use only one of the two.
|
1245
|
-
efa : int, default 0
|
1246
|
-
Number of elastic fabric adapter network devices to attach to container
|
1247
|
-
ephemeral_storage : int, default None
|
1248
|
-
The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
|
1249
|
-
This is only relevant for Fargate compute environments
|
1250
|
-
log_driver: str, optional, default None
|
1251
|
-
The log driver to use for the Amazon ECS container.
|
1252
|
-
log_options: List[str], optional, default None
|
1253
|
-
List of strings containing options for the chosen log driver. The configurable values
|
1254
|
-
depend on the `log driver` chosen. Validation of these options is not supported yet.
|
1255
|
-
Example: [`awslogs-group:aws/batch/job`]
|
1256
239
|
"""
|
1257
240
|
...
|
1258
241
|
|
@@ -1265,15 +248,6 @@ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) ->
|
|
1265
248
|
contains the exception raised. You can use it to detect the presence
|
1266
249
|
of errors, indicating that all happy-path artifacts produced by the step
|
1267
250
|
are missing.
|
1268
|
-
|
1269
|
-
Parameters
|
1270
|
-
----------
|
1271
|
-
var : str, optional, default None
|
1272
|
-
Name of the artifact in which to store the caught exception.
|
1273
|
-
If not specified, the exception is not stored.
|
1274
|
-
print_exception : bool, default True
|
1275
|
-
Determines whether or not the exception is printed to
|
1276
|
-
stdout when caught.
|
1277
251
|
"""
|
1278
252
|
...
|
1279
253
|
|
@@ -1293,510 +267,227 @@ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
|
|
1293
267
|
contains the exception raised. You can use it to detect the presence
|
1294
268
|
of errors, indicating that all happy-path artifacts produced by the step
|
1295
269
|
are missing.
|
1296
|
-
|
1297
|
-
Parameters
|
1298
|
-
----------
|
1299
|
-
var : str, optional, default None
|
1300
|
-
Name of the artifact in which to store the caught exception.
|
1301
|
-
If not specified, the exception is not stored.
|
1302
|
-
print_exception : bool, default True
|
1303
|
-
Determines whether or not the exception is printed to
|
1304
|
-
stdout when caught.
|
1305
270
|
"""
|
1306
271
|
...
|
1307
272
|
|
1308
273
|
@typing.overload
|
1309
|
-
def
|
274
|
+
def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1310
275
|
"""
|
1311
|
-
Specifies
|
276
|
+
Specifies the resources needed when executing this step.
|
1312
277
|
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
278
|
+
Use `@resources` to specify the resource requirements
|
279
|
+
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
280
|
+
|
281
|
+
You can choose the compute layer on the command line by executing e.g.
|
282
|
+
```
|
283
|
+
python myflow.py run --with batch
|
284
|
+
```
|
285
|
+
or
|
286
|
+
```
|
287
|
+
python myflow.py run --with kubernetes
|
288
|
+
```
|
289
|
+
which executes the flow on the desired system using the
|
290
|
+
requirements specified in `@resources`.
|
1317
291
|
"""
|
1318
292
|
...
|
1319
293
|
|
1320
294
|
@typing.overload
|
1321
|
-
def
|
295
|
+
def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1322
296
|
...
|
1323
297
|
|
1324
298
|
@typing.overload
|
1325
|
-
def
|
299
|
+
def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1326
300
|
...
|
1327
301
|
|
1328
|
-
def
|
302
|
+
def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
|
1329
303
|
"""
|
1330
|
-
Specifies
|
304
|
+
Specifies the resources needed when executing this step.
|
1331
305
|
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
306
|
+
Use `@resources` to specify the resource requirements
|
307
|
+
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
308
|
+
|
309
|
+
You can choose the compute layer on the command line by executing e.g.
|
310
|
+
```
|
311
|
+
python myflow.py run --with batch
|
312
|
+
```
|
313
|
+
or
|
314
|
+
```
|
315
|
+
python myflow.py run --with kubernetes
|
316
|
+
```
|
317
|
+
which executes the flow on the desired system using the
|
318
|
+
requirements specified in `@resources`.
|
1336
319
|
"""
|
1337
320
|
...
|
1338
321
|
|
1339
322
|
@typing.overload
|
1340
|
-
def
|
323
|
+
def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = 'METAFLOW_BATCH_JOB_QUEUE', iam_role: str = 'METAFLOW_ECS_S3_ACCESS_IAM_ROLE', execution_role: str = 'METAFLOW_ECS_FARGATE_EXECUTION_ROLE', shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1341
324
|
"""
|
1342
|
-
|
1343
|
-
|
1344
|
-
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1345
|
-
|
1346
|
-
Parameters
|
1347
|
-
----------
|
1348
|
-
type : str, default 'default'
|
1349
|
-
Card type.
|
1350
|
-
id : str, optional, default None
|
1351
|
-
If multiple cards are present, use this id to identify this card.
|
1352
|
-
options : Dict[str, Any], default {}
|
1353
|
-
Options passed to the card. The contents depend on the card type.
|
1354
|
-
timeout : int, default 45
|
1355
|
-
Interrupt reporting if it takes more than this many seconds.
|
1356
|
-
|
1357
|
-
|
325
|
+
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
|
1358
326
|
"""
|
1359
327
|
...
|
1360
328
|
|
1361
329
|
@typing.overload
|
1362
|
-
def
|
330
|
+
def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1363
331
|
...
|
1364
332
|
|
1365
333
|
@typing.overload
|
1366
|
-
def
|
334
|
+
def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1367
335
|
...
|
1368
336
|
|
1369
|
-
def
|
337
|
+
def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = 'METAFLOW_BATCH_JOB_QUEUE', iam_role: str = 'METAFLOW_ECS_S3_ACCESS_IAM_ROLE', execution_role: str = 'METAFLOW_ECS_FARGATE_EXECUTION_ROLE', shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
|
1370
338
|
"""
|
1371
|
-
|
1372
|
-
|
1373
|
-
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1374
|
-
|
1375
|
-
Parameters
|
1376
|
-
----------
|
1377
|
-
type : str, default 'default'
|
1378
|
-
Card type.
|
1379
|
-
id : str, optional, default None
|
1380
|
-
If multiple cards are present, use this id to identify this card.
|
1381
|
-
options : Dict[str, Any], default {}
|
1382
|
-
Options passed to the card. The contents depend on the card type.
|
1383
|
-
timeout : int, default 45
|
1384
|
-
Interrupt reporting if it takes more than this many seconds.
|
1385
|
-
|
1386
|
-
|
339
|
+
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
|
1387
340
|
"""
|
1388
341
|
...
|
1389
342
|
|
1390
343
|
@typing.overload
|
1391
|
-
def
|
344
|
+
def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1392
345
|
"""
|
1393
|
-
|
1394
|
-
the execution of a step.
|
346
|
+
Creates a human-readable report, a Metaflow Card, after this step completes.
|
1395
347
|
|
1396
|
-
|
1397
|
-
----------
|
1398
|
-
sources : List[Union[str, Dict[str, Any]]], default: []
|
1399
|
-
List of secret specs, defining how the secrets are to be retrieved
|
348
|
+
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1400
349
|
"""
|
1401
350
|
...
|
1402
351
|
|
1403
352
|
@typing.overload
|
1404
|
-
def
|
353
|
+
def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1405
354
|
...
|
1406
355
|
|
1407
356
|
@typing.overload
|
1408
|
-
def
|
357
|
+
def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1409
358
|
...
|
1410
359
|
|
1411
|
-
def
|
360
|
+
def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
|
1412
361
|
"""
|
1413
|
-
|
1414
|
-
the execution of a step.
|
362
|
+
Creates a human-readable report, a Metaflow Card, after this step completes.
|
1415
363
|
|
1416
|
-
|
1417
|
-
----------
|
1418
|
-
sources : List[Union[str, Dict[str, Any]]], default: []
|
1419
|
-
List of secret specs, defining how the secrets are to be retrieved
|
364
|
+
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1420
365
|
"""
|
1421
366
|
...
|
1422
367
|
|
1423
368
|
@typing.overload
|
1424
|
-
def
|
369
|
+
def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1425
370
|
"""
|
1426
|
-
Specifies
|
371
|
+
Specifies a timeout for your step.
|
1427
372
|
|
1428
|
-
|
1429
|
-
attributes set in the `@conda_base` flow-level decorator. Hence,
|
1430
|
-
you can use `@conda_base` to set packages required by all
|
1431
|
-
steps and use `@conda` to specify step-specific overrides.
|
373
|
+
This decorator is useful if this step may hang indefinitely.
|
1432
374
|
|
1433
|
-
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1440
|
-
python : str, optional, default None
|
1441
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1442
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1443
|
-
disabled : bool, default False
|
1444
|
-
If set to True, disables @conda.
|
375
|
+
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
376
|
+
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
377
|
+
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
378
|
+
|
379
|
+
Note that all the values specified in parameters are added together so if you specify
|
380
|
+
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
1445
381
|
"""
|
1446
382
|
...
|
1447
383
|
|
1448
384
|
@typing.overload
|
1449
|
-
def
|
385
|
+
def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1450
386
|
...
|
1451
387
|
|
1452
388
|
@typing.overload
|
1453
|
-
def
|
389
|
+
def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1454
390
|
...
|
1455
391
|
|
1456
|
-
def
|
392
|
+
def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
|
1457
393
|
"""
|
1458
|
-
Specifies
|
394
|
+
Specifies a timeout for your step.
|
1459
395
|
|
1460
|
-
|
1461
|
-
attributes set in the `@conda_base` flow-level decorator. Hence,
|
1462
|
-
you can use `@conda_base` to set packages required by all
|
1463
|
-
steps and use `@conda` to specify step-specific overrides.
|
396
|
+
This decorator is useful if this step may hang indefinitely.
|
1464
397
|
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1471
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1472
|
-
python : str, optional, default None
|
1473
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1474
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1475
|
-
disabled : bool, default False
|
1476
|
-
If set to True, disables @conda.
|
398
|
+
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
399
|
+
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
400
|
+
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
401
|
+
|
402
|
+
Note that all the values specified in parameters are added together so if you specify
|
403
|
+
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
1477
404
|
"""
|
1478
405
|
...
|
1479
406
|
|
1480
|
-
|
407
|
+
@typing.overload
|
408
|
+
def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1481
409
|
"""
|
1482
|
-
Specifies
|
1483
|
-
|
1484
|
-
Parameters
|
1485
|
-
----------
|
1486
|
-
cpu : int, default 1
|
1487
|
-
Number of CPUs required for this step. If `@resources` is
|
1488
|
-
also present, the maximum value from all decorators is used.
|
1489
|
-
memory : int, default 4096
|
1490
|
-
Memory size (in MB) required for this step. If
|
1491
|
-
`@resources` is also present, the maximum value from all decorators is
|
1492
|
-
used.
|
1493
|
-
disk : int, default 10240
|
1494
|
-
Disk size (in MB) required for this step. If
|
1495
|
-
`@resources` is also present, the maximum value from all decorators is
|
1496
|
-
used.
|
1497
|
-
image : str, optional, default None
|
1498
|
-
Docker image to use when launching on Kubernetes. If not specified, and
|
1499
|
-
METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
|
1500
|
-
not, a default Docker image mapping to the current version of Python is used.
|
1501
|
-
image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
|
1502
|
-
If given, the imagePullPolicy to be applied to the Docker image of the step.
|
1503
|
-
service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
|
1504
|
-
Kubernetes service account to use when launching pod in Kubernetes.
|
1505
|
-
secrets : List[str], optional, default None
|
1506
|
-
Kubernetes secrets to use when launching pod in Kubernetes. These
|
1507
|
-
secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
|
1508
|
-
in Metaflow configuration.
|
1509
|
-
node_selector: Union[Dict[str,str], str], optional, default None
|
1510
|
-
Kubernetes node selector(s) to apply to the pod running the task.
|
1511
|
-
Can be passed in as a comma separated string of values e.g. "kubernetes.io/os=linux,kubernetes.io/arch=amd64"
|
1512
|
-
or as a dictionary {"kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64"}
|
1513
|
-
namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
|
1514
|
-
Kubernetes namespace to use when launching pod in Kubernetes.
|
1515
|
-
gpu : int, optional, default None
|
1516
|
-
Number of GPUs required for this step. A value of zero implies that
|
1517
|
-
the scheduled node should not have GPUs.
|
1518
|
-
gpu_vendor : str, default KUBERNETES_GPU_VENDOR
|
1519
|
-
The vendor of the GPUs to be used for this step.
|
1520
|
-
tolerations : List[str], default []
|
1521
|
-
The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
|
1522
|
-
Kubernetes tolerations to use when launching pod in Kubernetes.
|
1523
|
-
use_tmpfs : bool, default False
|
1524
|
-
This enables an explicit tmpfs mount for this step.
|
1525
|
-
tmpfs_tempdir : bool, default True
|
1526
|
-
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
|
1527
|
-
tmpfs_size : int, optional, default: None
|
1528
|
-
The value for the size (in MiB) of the tmpfs mount for this step.
|
1529
|
-
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
|
1530
|
-
memory allocated for this step.
|
1531
|
-
tmpfs_path : str, optional, default /metaflow_temp
|
1532
|
-
Path to tmpfs mount for this step.
|
1533
|
-
persistent_volume_claims : Dict[str, str], optional, default None
|
1534
|
-
A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
|
1535
|
-
volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
|
1536
|
-
shared_memory: int, optional
|
1537
|
-
Shared memory size (in MiB) required for this step
|
1538
|
-
port: int, optional
|
1539
|
-
Port number to specify in the Kubernetes job object
|
1540
|
-
compute_pool : str, optional, default None
|
1541
|
-
Compute pool to be used for for this step.
|
1542
|
-
If not specified, any accessible compute pool within the perimeter is used.
|
1543
|
-
hostname_resolution_timeout: int, default 10 * 60
|
1544
|
-
Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
|
1545
|
-
Only applicable when @parallel is used.
|
410
|
+
Specifies secrets to be retrieved and injected as environment variables prior to
|
411
|
+
the execution of a step.
|
1546
412
|
"""
|
1547
413
|
...
|
1548
414
|
|
1549
|
-
|
415
|
+
@typing.overload
|
416
|
+
def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
417
|
+
...
|
418
|
+
|
419
|
+
@typing.overload
|
420
|
+
def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
421
|
+
...
|
422
|
+
|
423
|
+
def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
|
1550
424
|
"""
|
1551
|
-
|
1552
|
-
|
1553
|
-
and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
|
1554
|
-
added as a flow decorators. Adding more than one decorator will ensure that `start` step
|
1555
|
-
starts only after all sensors finish.
|
1556
|
-
|
1557
|
-
Parameters
|
1558
|
-
----------
|
1559
|
-
timeout : int
|
1560
|
-
Time, in seconds before the task times out and fails. (Default: 3600)
|
1561
|
-
poke_interval : int
|
1562
|
-
Time in seconds that the job should wait in between each try. (Default: 60)
|
1563
|
-
mode : str
|
1564
|
-
How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
|
1565
|
-
exponential_backoff : bool
|
1566
|
-
allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
|
1567
|
-
pool : str
|
1568
|
-
the slot pool this task should run in,
|
1569
|
-
slot pools are a way to limit concurrency for certain tasks. (Default:None)
|
1570
|
-
soft_fail : bool
|
1571
|
-
Set to true to mark the task as SKIPPED on failure. (Default: False)
|
1572
|
-
name : str
|
1573
|
-
Name of the sensor on Airflow
|
1574
|
-
description : str
|
1575
|
-
Description of sensor in the Airflow UI
|
1576
|
-
bucket_key : Union[str, List[str]]
|
1577
|
-
The key(s) being waited on. Supports full s3:// style url or relative path from root level.
|
1578
|
-
When it's specified as a full s3:// url, please leave `bucket_name` as None
|
1579
|
-
bucket_name : str
|
1580
|
-
Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
|
1581
|
-
When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
|
1582
|
-
wildcard_match : bool
|
1583
|
-
whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
|
1584
|
-
aws_conn_id : str
|
1585
|
-
a reference to the s3 connection on Airflow. (Default: None)
|
1586
|
-
verify : bool
|
1587
|
-
Whether or not to verify SSL certificates for S3 connection. (Default: None)
|
425
|
+
Specifies secrets to be retrieved and injected as environment variables prior to
|
426
|
+
the execution of a step.
|
1588
427
|
"""
|
1589
428
|
...
|
1590
429
|
|
1591
|
-
def
|
430
|
+
def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1592
431
|
"""
|
1593
|
-
Specifies
|
1594
|
-
|
1595
|
-
A project-specific namespace is created for all flows that
|
1596
|
-
use the same `@project(name)`.
|
1597
|
-
|
1598
|
-
Parameters
|
1599
|
-
----------
|
1600
|
-
name : str
|
1601
|
-
Project name. Make sure that the name is unique amongst all
|
1602
|
-
projects that use the same production scheduler. The name may
|
1603
|
-
contain only lowercase alphanumeric characters and underscores.
|
1604
|
-
|
1605
|
-
|
432
|
+
Specifies that this step should execute on Kubernetes.
|
1606
433
|
"""
|
1607
434
|
...
|
1608
435
|
|
1609
436
|
@typing.overload
|
1610
|
-
def
|
437
|
+
def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1611
438
|
"""
|
1612
|
-
Specifies the
|
439
|
+
Specifies the number of times the task corresponding
|
440
|
+
to a step needs to be retried.
|
1613
441
|
|
1614
|
-
|
1615
|
-
|
442
|
+
This decorator is useful for handling transient errors, such as networking issues.
|
443
|
+
If your task contains operations that can't be retried safely, e.g. database updates,
|
444
|
+
it is advisable to annotate it with `@retry(times=0)`.
|
1616
445
|
|
1617
|
-
|
1618
|
-
|
1619
|
-
|
1620
|
-
Packages to use for this flow. The key is the name of the package
|
1621
|
-
and the value is the version to use.
|
1622
|
-
libraries : Dict[str, str], default {}
|
1623
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1624
|
-
python : str, optional, default None
|
1625
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1626
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1627
|
-
disabled : bool, default False
|
1628
|
-
If set to True, disables Conda.
|
446
|
+
This can be used in conjunction with the `@catch` decorator. The `@catch`
|
447
|
+
decorator will execute a no-op task after all retries have been exhausted,
|
448
|
+
ensuring that the flow execution can continue.
|
1629
449
|
"""
|
1630
450
|
...
|
1631
451
|
|
1632
452
|
@typing.overload
|
1633
|
-
def
|
453
|
+
def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1634
454
|
...
|
1635
455
|
|
1636
|
-
|
1637
|
-
|
1638
|
-
Specifies the Conda environment for all steps of the flow.
|
1639
|
-
|
1640
|
-
Use `@conda_base` to set common libraries required by all
|
1641
|
-
steps and use `@conda` to specify step-specific additions.
|
1642
|
-
|
1643
|
-
Parameters
|
1644
|
-
----------
|
1645
|
-
packages : Dict[str, str], default {}
|
1646
|
-
Packages to use for this flow. The key is the name of the package
|
1647
|
-
and the value is the version to use.
|
1648
|
-
libraries : Dict[str, str], default {}
|
1649
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1650
|
-
python : str, optional, default None
|
1651
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1652
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1653
|
-
disabled : bool, default False
|
1654
|
-
If set to True, disables Conda.
|
1655
|
-
"""
|
456
|
+
@typing.overload
|
457
|
+
def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1656
458
|
...
|
1657
459
|
|
1658
|
-
def
|
460
|
+
def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
|
1659
461
|
"""
|
1660
|
-
|
1661
|
-
|
462
|
+
Specifies the number of times the task corresponding
|
463
|
+
to a step needs to be retried.
|
1662
464
|
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
|
1671
|
-
exponential_backoff : bool
|
1672
|
-
allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
|
1673
|
-
pool : str
|
1674
|
-
the slot pool this task should run in,
|
1675
|
-
slot pools are a way to limit concurrency for certain tasks. (Default:None)
|
1676
|
-
soft_fail : bool
|
1677
|
-
Set to true to mark the task as SKIPPED on failure. (Default: False)
|
1678
|
-
name : str
|
1679
|
-
Name of the sensor on Airflow
|
1680
|
-
description : str
|
1681
|
-
Description of sensor in the Airflow UI
|
1682
|
-
external_dag_id : str
|
1683
|
-
The dag_id that contains the task you want to wait for.
|
1684
|
-
external_task_ids : List[str]
|
1685
|
-
The list of task_ids that you want to wait for.
|
1686
|
-
If None (default value) the sensor waits for the DAG. (Default: None)
|
1687
|
-
allowed_states : List[str]
|
1688
|
-
Iterable of allowed states, (Default: ['success'])
|
1689
|
-
failed_states : List[str]
|
1690
|
-
Iterable of failed or dis-allowed states. (Default: None)
|
1691
|
-
execution_delta : datetime.timedelta
|
1692
|
-
time difference with the previous execution to look at,
|
1693
|
-
the default is the same logical date as the current task or DAG. (Default: None)
|
1694
|
-
check_existence: bool
|
1695
|
-
Set to True to check if the external task exists or check if
|
1696
|
-
the DAG to wait for exists. (Default: True)
|
465
|
+
This decorator is useful for handling transient errors, such as networking issues.
|
466
|
+
If your task contains operations that can't be retried safely, e.g. database updates,
|
467
|
+
it is advisable to annotate it with `@retry(times=0)`.
|
468
|
+
|
469
|
+
This can be used in conjunction with the `@catch` decorator. The `@catch`
|
470
|
+
decorator will execute a no-op task after all retries have been exhausted,
|
471
|
+
ensuring that the flow execution can continue.
|
1697
472
|
"""
|
1698
473
|
...
|
1699
474
|
|
1700
|
-
|
1701
|
-
def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
475
|
+
def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1702
476
|
"""
|
1703
|
-
|
1704
|
-
|
1705
|
-
|
1706
|
-
|
1707
|
-
|
1708
|
-
or
|
1709
|
-
```
|
1710
|
-
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
1711
|
-
```
|
1712
|
-
This decorator respects the @project decorator and triggers the flow
|
1713
|
-
when upstream runs within the same namespace complete successfully
|
1714
|
-
|
1715
|
-
Additionally, you can specify project aware upstream flow dependencies
|
1716
|
-
by specifying the fully qualified project_flow_name.
|
1717
|
-
```
|
1718
|
-
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
1719
|
-
```
|
1720
|
-
or
|
1721
|
-
```
|
1722
|
-
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
1723
|
-
```
|
1724
|
-
|
1725
|
-
You can also specify just the project or project branch (other values will be
|
1726
|
-
inferred from the current project or project branch):
|
1727
|
-
```
|
1728
|
-
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
1729
|
-
```
|
1730
|
-
|
1731
|
-
Note that `branch` is typically one of:
|
1732
|
-
- `prod`
|
1733
|
-
- `user.bob`
|
1734
|
-
- `test.my_experiment`
|
1735
|
-
- `prod.staging`
|
1736
|
-
|
1737
|
-
Parameters
|
1738
|
-
----------
|
1739
|
-
flow : Union[str, Dict[str, str]], optional, default None
|
1740
|
-
Upstream flow dependency for this flow.
|
1741
|
-
flows : List[Union[str, Dict[str, str]]], default []
|
1742
|
-
Upstream flow dependencies for this flow.
|
1743
|
-
options : Dict[str, Any], default {}
|
1744
|
-
Backend-specific configuration for tuning eventing behavior.
|
1745
|
-
|
1746
|
-
|
477
|
+
The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
|
478
|
+
before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
|
479
|
+
and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
|
480
|
+
added as a flow decorators. Adding more than one decorator will ensure that `start` step
|
481
|
+
starts only after all sensors finish.
|
1747
482
|
"""
|
1748
483
|
...
|
1749
484
|
|
1750
|
-
|
1751
|
-
def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
1752
|
-
...
|
1753
|
-
|
1754
|
-
def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
|
485
|
+
def project(*, name: str) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1755
486
|
"""
|
1756
|
-
Specifies
|
1757
|
-
|
1758
|
-
```
|
1759
|
-
@trigger_on_finish(flow='FooFlow')
|
1760
|
-
```
|
1761
|
-
or
|
1762
|
-
```
|
1763
|
-
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
1764
|
-
```
|
1765
|
-
This decorator respects the @project decorator and triggers the flow
|
1766
|
-
when upstream runs within the same namespace complete successfully
|
1767
|
-
|
1768
|
-
Additionally, you can specify project aware upstream flow dependencies
|
1769
|
-
by specifying the fully qualified project_flow_name.
|
1770
|
-
```
|
1771
|
-
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
1772
|
-
```
|
1773
|
-
or
|
1774
|
-
```
|
1775
|
-
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
1776
|
-
```
|
1777
|
-
|
1778
|
-
You can also specify just the project or project branch (other values will be
|
1779
|
-
inferred from the current project or project branch):
|
1780
|
-
```
|
1781
|
-
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
1782
|
-
```
|
1783
|
-
|
1784
|
-
Note that `branch` is typically one of:
|
1785
|
-
- `prod`
|
1786
|
-
- `user.bob`
|
1787
|
-
- `test.my_experiment`
|
1788
|
-
- `prod.staging`
|
1789
|
-
|
1790
|
-
Parameters
|
1791
|
-
----------
|
1792
|
-
flow : Union[str, Dict[str, str]], optional, default None
|
1793
|
-
Upstream flow dependency for this flow.
|
1794
|
-
flows : List[Union[str, Dict[str, str]]], default []
|
1795
|
-
Upstream flow dependencies for this flow.
|
1796
|
-
options : Dict[str, Any], default {}
|
1797
|
-
Backend-specific configuration for tuning eventing behavior.
|
1798
|
-
|
487
|
+
Specifies what flows belong to the same project.
|
1799
488
|
|
489
|
+
A project-specific namespace is created for all flows that
|
490
|
+
use the same `@project(name)`.
|
1800
491
|
"""
|
1801
492
|
...
|
1802
493
|
|
@@ -1832,17 +523,6 @@ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = No
|
|
1832
523
|
```
|
1833
524
|
@trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
|
1834
525
|
```
|
1835
|
-
|
1836
|
-
Parameters
|
1837
|
-
----------
|
1838
|
-
event : Union[str, Dict[str, Any]], optional, default None
|
1839
|
-
Event dependency for this flow.
|
1840
|
-
events : List[Union[str, Dict[str, Any]]], default []
|
1841
|
-
Events dependency for this flow.
|
1842
|
-
options : Dict[str, Any], default {}
|
1843
|
-
Backend-specific configuration for tuning eventing behavior.
|
1844
|
-
|
1845
|
-
|
1846
526
|
"""
|
1847
527
|
...
|
1848
528
|
|
@@ -1881,56 +561,6 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
|
|
1881
561
|
```
|
1882
562
|
@trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
|
1883
563
|
```
|
1884
|
-
|
1885
|
-
Parameters
|
1886
|
-
----------
|
1887
|
-
event : Union[str, Dict[str, Any]], optional, default None
|
1888
|
-
Event dependency for this flow.
|
1889
|
-
events : List[Union[str, Dict[str, Any]]], default []
|
1890
|
-
Events dependency for this flow.
|
1891
|
-
options : Dict[str, Any], default {}
|
1892
|
-
Backend-specific configuration for tuning eventing behavior.
|
1893
|
-
|
1894
|
-
|
1895
|
-
"""
|
1896
|
-
...
|
1897
|
-
|
1898
|
-
@typing.overload
|
1899
|
-
def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1900
|
-
"""
|
1901
|
-
Specifies the PyPI packages for all steps of the flow.
|
1902
|
-
|
1903
|
-
Use `@pypi_base` to set common packages required by all
|
1904
|
-
steps and use `@pypi` to specify step-specific overrides.
|
1905
|
-
Parameters
|
1906
|
-
----------
|
1907
|
-
packages : Dict[str, str], default: {}
|
1908
|
-
Packages to use for this flow. The key is the name of the package
|
1909
|
-
and the value is the version to use.
|
1910
|
-
python : str, optional, default: None
|
1911
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1912
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1913
|
-
"""
|
1914
|
-
...
|
1915
|
-
|
1916
|
-
@typing.overload
|
1917
|
-
def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
1918
|
-
...
|
1919
|
-
|
1920
|
-
def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
|
1921
|
-
"""
|
1922
|
-
Specifies the PyPI packages for all steps of the flow.
|
1923
|
-
|
1924
|
-
Use `@pypi_base` to set common packages required by all
|
1925
|
-
steps and use `@pypi` to specify step-specific overrides.
|
1926
|
-
Parameters
|
1927
|
-
----------
|
1928
|
-
packages : Dict[str, str], default: {}
|
1929
|
-
Packages to use for this flow. The key is the name of the package
|
1930
|
-
and the value is the version to use.
|
1931
|
-
python : str, optional, default: None
|
1932
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1933
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1934
564
|
"""
|
1935
565
|
...
|
1936
566
|
|
@@ -1939,21 +569,6 @@ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False,
|
|
1939
569
|
"""
|
1940
570
|
Specifies the times when the flow should be run when running on a
|
1941
571
|
production scheduler.
|
1942
|
-
|
1943
|
-
Parameters
|
1944
|
-
----------
|
1945
|
-
hourly : bool, default False
|
1946
|
-
Run the workflow hourly.
|
1947
|
-
daily : bool, default True
|
1948
|
-
Run the workflow daily.
|
1949
|
-
weekly : bool, default False
|
1950
|
-
Run the workflow weekly.
|
1951
|
-
cron : str, optional, default None
|
1952
|
-
Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
|
1953
|
-
specified by this expression.
|
1954
|
-
timezone : str, optional, default None
|
1955
|
-
Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
|
1956
|
-
which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
|
1957
572
|
"""
|
1958
573
|
...
|
1959
574
|
|
@@ -1965,1459 +580,140 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
|
|
1965
580
|
"""
|
1966
581
|
Specifies the times when the flow should be run when running on a
|
1967
582
|
production scheduler.
|
1968
|
-
|
1969
|
-
Parameters
|
1970
|
-
----------
|
1971
|
-
hourly : bool, default False
|
1972
|
-
Run the workflow hourly.
|
1973
|
-
daily : bool, default True
|
1974
|
-
Run the workflow daily.
|
1975
|
-
weekly : bool, default False
|
1976
|
-
Run the workflow weekly.
|
1977
|
-
cron : str, optional, default None
|
1978
|
-
Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
|
1979
|
-
specified by this expression.
|
1980
|
-
timezone : str, optional, default None
|
1981
|
-
Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
|
1982
|
-
which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
|
1983
583
|
"""
|
1984
584
|
...
|
1985
585
|
|
1986
|
-
def
|
586
|
+
def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1987
587
|
"""
|
1988
|
-
|
1989
|
-
|
1990
|
-
This call has a global effect. No objects outside this namespace
|
1991
|
-
will be accessible. To access all objects regardless of namespaces,
|
1992
|
-
pass None to this call.
|
1993
|
-
|
1994
|
-
Parameters
|
1995
|
-
----------
|
1996
|
-
ns : str, optional
|
1997
|
-
Namespace to switch to or None to ignore namespaces.
|
1998
|
-
|
1999
|
-
Returns
|
2000
|
-
-------
|
2001
|
-
str, optional
|
2002
|
-
Namespace set (result of get_namespace()).
|
588
|
+
The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
|
589
|
+
This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
|
2003
590
|
"""
|
2004
591
|
...
|
2005
592
|
|
2006
|
-
|
593
|
+
@typing.overload
|
594
|
+
def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
2007
595
|
"""
|
2008
|
-
|
2009
|
-
|
2010
|
-
The namespace is a tag associated with all objects in Metaflow.
|
596
|
+
Specifies the PyPI packages for all steps of the flow.
|
2011
597
|
|
2012
|
-
|
2013
|
-
|
2014
|
-
str, optional
|
2015
|
-
The current namespace used to filter objects.
|
598
|
+
Use `@pypi_base` to set common packages required by all
|
599
|
+
steps and use `@pypi` to specify step-specific overrides.
|
2016
600
|
"""
|
2017
601
|
...
|
2018
602
|
|
2019
|
-
|
603
|
+
@typing.overload
|
604
|
+
def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
605
|
+
...
|
606
|
+
|
607
|
+
def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
|
2020
608
|
"""
|
2021
|
-
|
2022
|
-
used prior to any `namespace` calls.
|
609
|
+
Specifies the PyPI packages for all steps of the flow.
|
2023
610
|
|
2024
|
-
|
2025
|
-
|
2026
|
-
str
|
2027
|
-
The result of get_namespace() after the namespace has been reset.
|
611
|
+
Use `@pypi_base` to set common packages required by all
|
612
|
+
steps and use `@pypi` to specify step-specific overrides.
|
2028
613
|
"""
|
2029
614
|
...
|
2030
615
|
|
2031
|
-
|
616
|
+
@typing.overload
|
617
|
+
def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
2032
618
|
"""
|
2033
|
-
|
619
|
+
Specifies the flow(s) that this flow depends on.
|
2034
620
|
|
2035
|
-
|
2036
|
-
|
2037
|
-
|
621
|
+
```
|
622
|
+
@trigger_on_finish(flow='FooFlow')
|
623
|
+
```
|
624
|
+
or
|
625
|
+
```
|
626
|
+
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
627
|
+
```
|
628
|
+
This decorator respects the @project decorator and triggers the flow
|
629
|
+
when upstream runs within the same namespace complete successfully
|
630
|
+
|
631
|
+
Additionally, you can specify project aware upstream flow dependencies
|
632
|
+
by specifying the fully qualified project_flow_name.
|
633
|
+
```
|
634
|
+
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
635
|
+
```
|
636
|
+
or
|
637
|
+
```
|
638
|
+
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
639
|
+
```
|
2038
640
|
|
2039
|
-
|
2040
|
-
|
641
|
+
You can also specify just the project or project branch (other values will be
|
642
|
+
inferred from the current project or project branch):
|
643
|
+
```
|
644
|
+
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
645
|
+
```
|
2041
646
|
|
2042
|
-
|
2043
|
-
|
2044
|
-
|
2045
|
-
|
2046
|
-
|
2047
|
-
local providers).
|
647
|
+
Note that `branch` is typically one of:
|
648
|
+
- `prod`
|
649
|
+
- `user.bob`
|
650
|
+
- `test.my_experiment`
|
651
|
+
- `prod.staging`
|
2048
652
|
"""
|
2049
653
|
...
|
2050
654
|
|
2051
|
-
|
2052
|
-
|
2053
|
-
Resets the Metadata provider to the default value, that is, to the value
|
2054
|
-
that was used prior to any `metadata` calls.
|
2055
|
-
|
2056
|
-
Returns
|
2057
|
-
-------
|
2058
|
-
str
|
2059
|
-
The result of get_metadata() after resetting the provider.
|
2060
|
-
"""
|
655
|
+
@typing.overload
|
656
|
+
def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
2061
657
|
...
|
2062
658
|
|
2063
|
-
|
659
|
+
def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
|
2064
660
|
"""
|
2065
|
-
|
2066
|
-
|
2067
|
-
This object can be used to list all the flows present either through the explicit property
|
2068
|
-
or by iterating over this object.
|
2069
|
-
|
2070
|
-
Attributes
|
2071
|
-
----------
|
2072
|
-
flows : List[Flow]
|
2073
|
-
Returns the list of all `Flow` objects known to this metadata provider. Note that only
|
2074
|
-
flows present in the current namespace will be returned. A `Flow` is present in a namespace
|
2075
|
-
if it has at least one run in the namespace.
|
2076
|
-
"""
|
2077
|
-
def __init__(self, _current_metadata: typing.Optional[str] = None):
|
2078
|
-
...
|
2079
|
-
@property
|
2080
|
-
def flows(self) -> typing.List[metaflow.client.core.Flow]:
|
2081
|
-
"""
|
2082
|
-
Returns a list of all the flows present.
|
2083
|
-
|
2084
|
-
Only flows present in the set namespace are returned. A flow is present in a namespace if
|
2085
|
-
it has at least one run that is in the namespace.
|
2086
|
-
|
2087
|
-
Returns
|
2088
|
-
-------
|
2089
|
-
List[Flow]
|
2090
|
-
List of all flows present.
|
2091
|
-
"""
|
2092
|
-
...
|
2093
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Flow]:
|
2094
|
-
"""
|
2095
|
-
Iterator over all flows present.
|
2096
|
-
|
2097
|
-
Only flows present in the set namespace are returned. A flow is present in a
|
2098
|
-
namespace if it has at least one run that is in the namespace.
|
2099
|
-
|
2100
|
-
Yields
|
2101
|
-
-------
|
2102
|
-
Flow
|
2103
|
-
A Flow present in the Metaflow universe.
|
2104
|
-
"""
|
2105
|
-
...
|
2106
|
-
def __str__(self) -> str:
|
2107
|
-
...
|
2108
|
-
def __getitem__(self, name: str) -> metaflow.client.core.Flow:
|
2109
|
-
"""
|
2110
|
-
Returns a specific flow by name.
|
2111
|
-
|
2112
|
-
The flow will only be returned if it is present in the current namespace.
|
2113
|
-
|
2114
|
-
Parameters
|
2115
|
-
----------
|
2116
|
-
name : str
|
2117
|
-
Name of the Flow
|
2118
|
-
|
2119
|
-
Returns
|
2120
|
-
-------
|
2121
|
-
Flow
|
2122
|
-
Flow with the given name.
|
2123
|
-
"""
|
2124
|
-
...
|
2125
|
-
...
|
2126
|
-
|
2127
|
-
class Flow(metaflow.client.core.MetaflowObject, metaclass=type):
|
2128
|
-
"""
|
2129
|
-
A Flow represents all existing flows with a certain name, in other words,
|
2130
|
-
classes derived from `FlowSpec`. A container of `Run` objects.
|
2131
|
-
|
2132
|
-
Attributes
|
2133
|
-
----------
|
2134
|
-
latest_run : Run
|
2135
|
-
Latest `Run` (in progress or completed, successfully or not) of this flow.
|
2136
|
-
latest_successful_run : Run
|
2137
|
-
Latest successfully completed `Run` of this flow.
|
2138
|
-
"""
|
2139
|
-
def __init__(self, *args, **kwargs):
|
2140
|
-
...
|
2141
|
-
@property
|
2142
|
-
def latest_run(self) -> typing.Optional[metaflow.client.core.Run]:
|
2143
|
-
"""
|
2144
|
-
Returns the latest run (either in progress or completed) of this flow.
|
2145
|
-
|
2146
|
-
Note that an in-progress run may be returned by this call. Use latest_successful_run
|
2147
|
-
to get an object representing a completed successful run.
|
2148
|
-
|
2149
|
-
Returns
|
2150
|
-
-------
|
2151
|
-
Run, optional
|
2152
|
-
Latest run of this flow
|
2153
|
-
"""
|
2154
|
-
...
|
2155
|
-
@property
|
2156
|
-
def latest_successful_run(self) -> typing.Optional[metaflow.client.core.Run]:
|
2157
|
-
"""
|
2158
|
-
Returns the latest successful run of this flow.
|
2159
|
-
|
2160
|
-
Returns
|
2161
|
-
-------
|
2162
|
-
Run, optional
|
2163
|
-
Latest successful run of this flow
|
2164
|
-
"""
|
2165
|
-
...
|
2166
|
-
def runs(self, *tags: str) -> typing.Iterator[metaflow.client.core.Run]:
|
2167
|
-
"""
|
2168
|
-
Returns an iterator over all `Run`s of this flow.
|
2169
|
-
|
2170
|
-
An optional filter is available that allows you to filter on tags.
|
2171
|
-
If multiple tags are specified, only runs that have all the
|
2172
|
-
specified tags are returned.
|
2173
|
-
|
2174
|
-
Parameters
|
2175
|
-
----------
|
2176
|
-
tags : str
|
2177
|
-
Tags to match.
|
2178
|
-
|
2179
|
-
Yields
|
2180
|
-
------
|
2181
|
-
Run
|
2182
|
-
`Run` objects in this flow.
|
2183
|
-
"""
|
2184
|
-
...
|
2185
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Task]:
|
2186
|
-
"""
|
2187
|
-
Iterate over all children Run of this Flow.
|
2188
|
-
|
2189
|
-
Note that only runs in the current namespace are returned unless
|
2190
|
-
_namespace_check is False
|
2191
|
-
|
2192
|
-
Yields
|
2193
|
-
------
|
2194
|
-
Run
|
2195
|
-
A Run in this Flow
|
2196
|
-
"""
|
2197
|
-
...
|
2198
|
-
def __getitem__(self, run_id: str) -> metaflow.client.core.Run:
|
2199
|
-
"""
|
2200
|
-
Returns the Run object with the run ID 'run_id'
|
2201
|
-
|
2202
|
-
Parameters
|
2203
|
-
----------
|
2204
|
-
run_id : str
|
2205
|
-
Run OD
|
2206
|
-
|
2207
|
-
Returns
|
2208
|
-
-------
|
2209
|
-
Run
|
2210
|
-
Run for this run ID in this Flow
|
2211
|
-
|
2212
|
-
Raises
|
2213
|
-
------
|
2214
|
-
KeyError
|
2215
|
-
If the run_id does not identify a valid Run object
|
2216
|
-
"""
|
2217
|
-
...
|
2218
|
-
def __getstate__(self):
|
2219
|
-
...
|
2220
|
-
def __setstate__(self, state):
|
2221
|
-
...
|
2222
|
-
...
|
2223
|
-
|
2224
|
-
class Run(metaflow.client.core.MetaflowObject, metaclass=type):
|
2225
|
-
"""
|
2226
|
-
A `Run` represents an execution of a `Flow`. It is a container of `Step`s.
|
2227
|
-
|
2228
|
-
Attributes
|
2229
|
-
----------
|
2230
|
-
data : MetaflowData
|
2231
|
-
a shortcut to run['end'].task.data, i.e. data produced by this run.
|
2232
|
-
successful : bool
|
2233
|
-
True if the run completed successfully.
|
2234
|
-
finished : bool
|
2235
|
-
True if the run completed.
|
2236
|
-
finished_at : datetime
|
2237
|
-
Time this run finished.
|
2238
|
-
code : MetaflowCode
|
2239
|
-
Code package for this run (if present). See `MetaflowCode`.
|
2240
|
-
trigger : MetaflowTrigger
|
2241
|
-
Information about event(s) that triggered this run (if present). See `MetaflowTrigger`.
|
2242
|
-
end_task : Task
|
2243
|
-
`Task` for the end step (if it is present already).
|
2244
|
-
"""
|
2245
|
-
def steps(self, *tags: str) -> typing.Iterator[metaflow.client.core.Step]:
|
2246
|
-
"""
|
2247
|
-
[Legacy function - do not use]
|
2248
|
-
|
2249
|
-
Returns an iterator over all `Step` objects in the step. This is an alias
|
2250
|
-
to iterating the object itself, i.e.
|
2251
|
-
```
|
2252
|
-
list(Run(...)) == list(Run(...).steps())
|
2253
|
-
```
|
2254
|
-
|
2255
|
-
Parameters
|
2256
|
-
----------
|
2257
|
-
tags : str
|
2258
|
-
No op (legacy functionality)
|
2259
|
-
|
2260
|
-
Yields
|
2261
|
-
------
|
2262
|
-
Step
|
2263
|
-
`Step` objects in this run.
|
2264
|
-
"""
|
2265
|
-
...
|
2266
|
-
@property
|
2267
|
-
def code(self) -> typing.Optional[metaflow.client.core.MetaflowCode]:
|
2268
|
-
"""
|
2269
|
-
Returns the MetaflowCode object for this run, if present.
|
2270
|
-
Code is packed if atleast one `Step` runs remotely, else None is returned.
|
2271
|
-
|
2272
|
-
Returns
|
2273
|
-
-------
|
2274
|
-
MetaflowCode, optional
|
2275
|
-
Code package for this run
|
2276
|
-
"""
|
2277
|
-
...
|
2278
|
-
@property
|
2279
|
-
def data(self) -> typing.Optional[metaflow.client.core.MetaflowData]:
|
2280
|
-
"""
|
2281
|
-
Returns a container of data artifacts produced by this run.
|
2282
|
-
|
2283
|
-
You can access data produced by this run as follows:
|
2284
|
-
```
|
2285
|
-
print(run.data.my_var)
|
2286
|
-
```
|
2287
|
-
This is a shorthand for `run['end'].task.data`. If the 'end' step has not yet
|
2288
|
-
executed, returns None.
|
2289
|
-
|
2290
|
-
Returns
|
2291
|
-
-------
|
2292
|
-
MetaflowData, optional
|
2293
|
-
Container of all artifacts produced by this task
|
2294
|
-
"""
|
2295
|
-
...
|
2296
|
-
@property
|
2297
|
-
def successful(self) -> bool:
|
2298
|
-
"""
|
2299
|
-
Indicates whether or not the run completed successfully.
|
2300
|
-
|
2301
|
-
A run is successful if its 'end' step is successful.
|
2302
|
-
|
2303
|
-
Returns
|
2304
|
-
-------
|
2305
|
-
bool
|
2306
|
-
True if the run completed successfully and False otherwise
|
2307
|
-
"""
|
2308
|
-
...
|
2309
|
-
@property
|
2310
|
-
def finished(self) -> bool:
|
2311
|
-
"""
|
2312
|
-
Indicates whether or not the run completed.
|
2313
|
-
|
2314
|
-
A run completed if its 'end' step completed.
|
2315
|
-
|
2316
|
-
Returns
|
2317
|
-
-------
|
2318
|
-
bool
|
2319
|
-
True if the run completed and False otherwise
|
2320
|
-
"""
|
2321
|
-
...
|
2322
|
-
@property
|
2323
|
-
def finished_at(self) -> typing.Optional[datetime.datetime]:
|
2324
|
-
"""
|
2325
|
-
Returns the datetime object of when the run finished (successfully or not).
|
2326
|
-
|
2327
|
-
The completion time of a run is the same as the completion time of its 'end' step.
|
2328
|
-
If the 'end' step has not completed, returns None.
|
2329
|
-
|
2330
|
-
Returns
|
2331
|
-
-------
|
2332
|
-
datetime, optional
|
2333
|
-
Datetime of when the run finished
|
2334
|
-
"""
|
2335
|
-
...
|
2336
|
-
@property
|
2337
|
-
def end_task(self) -> typing.Optional[metaflow.client.core.Task]:
|
2338
|
-
"""
|
2339
|
-
Returns the Task corresponding to the 'end' step.
|
2340
|
-
|
2341
|
-
This returns None if the end step does not yet exist.
|
2342
|
-
|
2343
|
-
Returns
|
2344
|
-
-------
|
2345
|
-
Task, optional
|
2346
|
-
The 'end' task
|
2347
|
-
"""
|
2348
|
-
...
|
2349
|
-
def add_tag(self, tag: str):
|
2350
|
-
"""
|
2351
|
-
Add a tag to this `Run`.
|
2352
|
-
|
2353
|
-
Note that if the tag is already a system tag, it is not added as a user tag,
|
2354
|
-
and no error is thrown.
|
2355
|
-
|
2356
|
-
Parameters
|
2357
|
-
----------
|
2358
|
-
tag : str
|
2359
|
-
Tag to add.
|
2360
|
-
"""
|
2361
|
-
...
|
2362
|
-
def add_tags(self, tags: typing.Iterable[str]):
|
2363
|
-
"""
|
2364
|
-
Add one or more tags to this `Run`.
|
2365
|
-
|
2366
|
-
Note that if any tag is already a system tag, it is not added as a user tag
|
2367
|
-
and no error is thrown.
|
2368
|
-
|
2369
|
-
Parameters
|
2370
|
-
----------
|
2371
|
-
tags : Iterable[str]
|
2372
|
-
Tags to add.
|
2373
|
-
"""
|
2374
|
-
...
|
2375
|
-
def remove_tag(self, tag: str):
|
2376
|
-
"""
|
2377
|
-
Remove one tag from this `Run`.
|
2378
|
-
|
2379
|
-
Removing a system tag is an error. Removing a non-existent
|
2380
|
-
user tag is a no-op.
|
2381
|
-
|
2382
|
-
Parameters
|
2383
|
-
----------
|
2384
|
-
tag : str
|
2385
|
-
Tag to remove.
|
2386
|
-
"""
|
2387
|
-
...
|
2388
|
-
def remove_tags(self, tags: typing.Iterable[str]):
|
2389
|
-
"""
|
2390
|
-
Remove one or more tags to this `Run`.
|
2391
|
-
|
2392
|
-
Removing a system tag will result in an error. Removing a non-existent
|
2393
|
-
user tag is a no-op.
|
2394
|
-
|
2395
|
-
Parameters
|
2396
|
-
----------
|
2397
|
-
tags : Iterable[str]
|
2398
|
-
Tags to remove.
|
2399
|
-
"""
|
2400
|
-
...
|
2401
|
-
def replace_tag(self, tag_to_remove: str, tag_to_add: str):
|
2402
|
-
"""
|
2403
|
-
Remove a tag and add a tag atomically. Removal is done first.
|
2404
|
-
The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
|
2405
|
-
|
2406
|
-
Parameters
|
2407
|
-
----------
|
2408
|
-
tag_to_remove : str
|
2409
|
-
Tag to remove.
|
2410
|
-
tag_to_add : str
|
2411
|
-
Tag to add.
|
2412
|
-
"""
|
2413
|
-
...
|
2414
|
-
def replace_tags(self, tags_to_remove: typing.Iterable[str], tags_to_add: typing.Iterable[str]):
|
2415
|
-
"""
|
2416
|
-
Remove and add tags atomically; the removal is done first.
|
2417
|
-
The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
|
2418
|
-
|
2419
|
-
Parameters
|
2420
|
-
----------
|
2421
|
-
tags_to_remove : Iterable[str]
|
2422
|
-
Tags to remove.
|
2423
|
-
tags_to_add : Iterable[str]
|
2424
|
-
Tags to add.
|
2425
|
-
"""
|
2426
|
-
...
|
2427
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Step]:
|
2428
|
-
"""
|
2429
|
-
Iterate over all children Step of this Run
|
2430
|
-
|
2431
|
-
Yields
|
2432
|
-
------
|
2433
|
-
Step
|
2434
|
-
A Step in this Run
|
2435
|
-
"""
|
2436
|
-
...
|
2437
|
-
def __getitem__(self, name: str) -> metaflow.client.core.Step:
|
2438
|
-
"""
|
2439
|
-
Returns the Step object with the step name 'name'
|
2440
|
-
|
2441
|
-
Parameters
|
2442
|
-
----------
|
2443
|
-
name : str
|
2444
|
-
Step name
|
2445
|
-
|
2446
|
-
Returns
|
2447
|
-
-------
|
2448
|
-
Step
|
2449
|
-
Step for this step name in this Run
|
2450
|
-
|
2451
|
-
Raises
|
2452
|
-
------
|
2453
|
-
KeyError
|
2454
|
-
If the name does not identify a valid Step object
|
2455
|
-
"""
|
2456
|
-
...
|
2457
|
-
def __getstate__(self):
|
2458
|
-
...
|
2459
|
-
def __setstate__(self, state):
|
2460
|
-
...
|
2461
|
-
@property
|
2462
|
-
def trigger(self) -> typing.Optional[metaflow.events.Trigger]:
|
2463
|
-
"""
|
2464
|
-
Returns a container of events that triggered this run.
|
2465
|
-
|
2466
|
-
This returns None if the run was not triggered by any events.
|
2467
|
-
|
2468
|
-
Returns
|
2469
|
-
-------
|
2470
|
-
Trigger, optional
|
2471
|
-
Container of triggering events
|
2472
|
-
"""
|
2473
|
-
...
|
2474
|
-
...
|
2475
|
-
|
2476
|
-
class Step(metaflow.client.core.MetaflowObject, metaclass=type):
|
2477
|
-
"""
|
2478
|
-
A `Step` represents a user-defined step, that is, a method annotated with the `@step` decorator.
|
2479
|
-
|
2480
|
-
It contains `Task` objects associated with the step, that is, all executions of the
|
2481
|
-
`Step`. The step may contain multiple `Task`s in the case of a foreach step.
|
2482
|
-
|
2483
|
-
Attributes
|
2484
|
-
----------
|
2485
|
-
task : Task
|
2486
|
-
The first `Task` object in this step. This is a shortcut for retrieving the only
|
2487
|
-
task contained in a non-foreach step.
|
2488
|
-
finished_at : datetime
|
2489
|
-
Time when the latest `Task` of this step finished. Note that in the case of foreaches,
|
2490
|
-
this time may change during execution of the step.
|
2491
|
-
environment_info : Dict[str, Any]
|
2492
|
-
Information about the execution environment.
|
2493
|
-
"""
|
2494
|
-
@property
|
2495
|
-
def task(self) -> typing.Optional[metaflow.client.core.Task]:
|
2496
|
-
"""
|
2497
|
-
Returns a Task object belonging to this step.
|
2498
|
-
|
2499
|
-
This is useful when the step only contains one task (a linear step for example).
|
2500
|
-
|
2501
|
-
Returns
|
2502
|
-
-------
|
2503
|
-
Task
|
2504
|
-
A task in the step
|
2505
|
-
"""
|
2506
|
-
...
|
2507
|
-
def tasks(self, *tags: str) -> typing.Iterable[metaflow.client.core.Task]:
|
2508
|
-
"""
|
2509
|
-
[Legacy function - do not use]
|
2510
|
-
|
2511
|
-
Returns an iterator over all `Task` objects in the step. This is an alias
|
2512
|
-
to iterating the object itself, i.e.
|
2513
|
-
```
|
2514
|
-
list(Step(...)) == list(Step(...).tasks())
|
2515
|
-
```
|
2516
|
-
|
2517
|
-
Parameters
|
2518
|
-
----------
|
2519
|
-
tags : str
|
2520
|
-
No op (legacy functionality)
|
2521
|
-
|
2522
|
-
Yields
|
2523
|
-
------
|
2524
|
-
Task
|
2525
|
-
`Task` objects in this step.
|
2526
|
-
"""
|
2527
|
-
...
|
2528
|
-
@property
|
2529
|
-
def control_task(self) -> typing.Optional[metaflow.client.core.Task]:
|
2530
|
-
"""
|
2531
|
-
[Unpublished API - use with caution!]
|
2532
|
-
|
2533
|
-
Returns a Control Task object belonging to this step.
|
2534
|
-
This is useful when the step only contains one control task.
|
2535
|
-
|
2536
|
-
Returns
|
2537
|
-
-------
|
2538
|
-
Task
|
2539
|
-
A control task in the step
|
2540
|
-
"""
|
2541
|
-
...
|
2542
|
-
def control_tasks(self, *tags: str) -> typing.Iterator[metaflow.client.core.Task]:
|
2543
|
-
"""
|
2544
|
-
[Unpublished API - use with caution!]
|
2545
|
-
|
2546
|
-
Returns an iterator over all the control tasks in the step.
|
2547
|
-
An optional filter is available that allows you to filter on tags. The
|
2548
|
-
control tasks returned if the filter is specified will contain all the
|
2549
|
-
tags specified.
|
2550
|
-
Parameters
|
2551
|
-
----------
|
2552
|
-
tags : str
|
2553
|
-
Tags to match
|
2554
|
-
|
2555
|
-
Yields
|
2556
|
-
------
|
2557
|
-
Task
|
2558
|
-
Control Task objects for this step
|
2559
|
-
"""
|
2560
|
-
...
|
2561
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Task]:
|
2562
|
-
"""
|
2563
|
-
Iterate over all children Task of this Step
|
2564
|
-
|
2565
|
-
Yields
|
2566
|
-
------
|
2567
|
-
Task
|
2568
|
-
A Task in this Step
|
2569
|
-
"""
|
2570
|
-
...
|
2571
|
-
def __getitem__(self, task_id: str) -> metaflow.client.core.Task:
|
2572
|
-
"""
|
2573
|
-
Returns the Task object with the task ID 'task_id'
|
2574
|
-
|
2575
|
-
Parameters
|
2576
|
-
----------
|
2577
|
-
task_id : str
|
2578
|
-
Task ID
|
2579
|
-
|
2580
|
-
Returns
|
2581
|
-
-------
|
2582
|
-
Task
|
2583
|
-
Task for this task ID in this Step
|
2584
|
-
|
2585
|
-
Raises
|
2586
|
-
------
|
2587
|
-
KeyError
|
2588
|
-
If the task_id does not identify a valid Task object
|
2589
|
-
"""
|
2590
|
-
...
|
2591
|
-
def __getstate__(self):
|
2592
|
-
...
|
2593
|
-
def __setstate__(self, state):
|
2594
|
-
...
|
2595
|
-
@property
|
2596
|
-
def finished_at(self) -> typing.Optional[datetime.datetime]:
|
2597
|
-
"""
|
2598
|
-
Returns the datetime object of when the step finished (successfully or not).
|
2599
|
-
|
2600
|
-
A step is considered finished when all the tasks that belong to it have
|
2601
|
-
finished. This call will return None if the step has not finished
|
2602
|
-
|
2603
|
-
Returns
|
2604
|
-
-------
|
2605
|
-
datetime
|
2606
|
-
Datetime of when the step finished
|
2607
|
-
"""
|
2608
|
-
...
|
2609
|
-
@property
|
2610
|
-
def environment_info(self) -> typing.Optional[typing.Dict[str, typing.Any]]:
|
2611
|
-
"""
|
2612
|
-
Returns information about the environment that was used to execute this step. As an
|
2613
|
-
example, if the Conda environment is selected, this will return information about the
|
2614
|
-
dependencies that were used in the environment.
|
2615
|
-
|
2616
|
-
This environment information is only available for steps that have tasks
|
2617
|
-
for which the code package has been saved.
|
2618
|
-
|
2619
|
-
Returns
|
2620
|
-
-------
|
2621
|
-
Dict[str, Any], optional
|
2622
|
-
Dictionary describing the environment
|
2623
|
-
"""
|
2624
|
-
...
|
2625
|
-
...
|
2626
|
-
|
2627
|
-
class Task(metaflow.client.core.MetaflowObject, metaclass=type):
|
2628
|
-
"""
|
2629
|
-
A `Task` represents an execution of a `Step`.
|
2630
|
-
|
2631
|
-
It contains all `DataArtifact` objects produced by the task as
|
2632
|
-
well as metadata related to execution.
|
2633
|
-
|
2634
|
-
Note that the `@retry` decorator may cause multiple attempts of
|
2635
|
-
the task to be present. Usually you want the latest attempt, which
|
2636
|
-
is what instantiating a `Task` object returns by default. If
|
2637
|
-
you need to e.g. retrieve logs from a failed attempt, you can
|
2638
|
-
explicitly get information about a specific attempt by using the
|
2639
|
-
following syntax when creating a task:
|
2640
|
-
|
2641
|
-
`Task('flow/run/step/task', attempt=<attempt>)`
|
2642
|
-
|
2643
|
-
where `attempt=0` corresponds to the first attempt etc.
|
2644
|
-
|
2645
|
-
Attributes
|
2646
|
-
----------
|
2647
|
-
metadata : List[Metadata]
|
2648
|
-
List of all metadata events associated with the task.
|
2649
|
-
metadata_dict : Dict[str, str]
|
2650
|
-
A condensed version of `metadata`: A dictionary where keys
|
2651
|
-
are names of metadata events and values the latest corresponding event.
|
2652
|
-
data : MetaflowData
|
2653
|
-
Container of all data artifacts produced by this task. Note that this
|
2654
|
-
call downloads all data locally, so it can be slower than accessing
|
2655
|
-
artifacts individually. See `MetaflowData` for more information.
|
2656
|
-
artifacts : MetaflowArtifacts
|
2657
|
-
Container of `DataArtifact` objects produced by this task.
|
2658
|
-
successful : bool
|
2659
|
-
True if the task completed successfully.
|
2660
|
-
finished : bool
|
2661
|
-
True if the task completed.
|
2662
|
-
exception : object
|
2663
|
-
Exception raised by this task if there was one.
|
2664
|
-
finished_at : datetime
|
2665
|
-
Time this task finished.
|
2666
|
-
runtime_name : str
|
2667
|
-
Runtime this task was executed on.
|
2668
|
-
stdout : str
|
2669
|
-
Standard output for the task execution.
|
2670
|
-
stderr : str
|
2671
|
-
Standard error output for the task execution.
|
2672
|
-
code : MetaflowCode
|
2673
|
-
Code package for this task (if present). See `MetaflowCode`.
|
2674
|
-
environment_info : Dict[str, str]
|
2675
|
-
Information about the execution environment.
|
2676
|
-
"""
|
2677
|
-
def __init__(self, *args, **kwargs):
|
2678
|
-
...
|
2679
|
-
@property
|
2680
|
-
def metadata(self) -> typing.List[metaflow.client.core.Metadata]:
|
2681
|
-
"""
|
2682
|
-
Metadata events produced by this task across all attempts of the task
|
2683
|
-
*except* if you selected a specific task attempt.
|
2684
|
-
|
2685
|
-
Note that Metadata is different from tags.
|
2686
|
-
|
2687
|
-
Returns
|
2688
|
-
-------
|
2689
|
-
List[Metadata]
|
2690
|
-
Metadata produced by this task
|
2691
|
-
"""
|
2692
|
-
...
|
2693
|
-
@property
|
2694
|
-
def metadata_dict(self) -> typing.Dict[str, str]:
|
2695
|
-
"""
|
2696
|
-
Dictionary mapping metadata names (keys) and their associated values.
|
2697
|
-
|
2698
|
-
Note that unlike the metadata() method, this call will only return the latest
|
2699
|
-
metadata for a given name. For example, if a task executes multiple times (retries),
|
2700
|
-
the same metadata name will be generated multiple times (one for each execution of the
|
2701
|
-
task). The metadata() method returns all those metadata elements whereas this call will
|
2702
|
-
return the metadata associated with the latest execution of the task.
|
2703
|
-
|
2704
|
-
Returns
|
2705
|
-
-------
|
2706
|
-
Dict[str, str]
|
2707
|
-
Dictionary mapping metadata name with value
|
2708
|
-
"""
|
2709
|
-
...
|
2710
|
-
@property
|
2711
|
-
def index(self) -> typing.Optional[int]:
|
2712
|
-
"""
|
2713
|
-
Returns the index of the innermost foreach loop if this task is run inside at least
|
2714
|
-
one foreach.
|
2715
|
-
|
2716
|
-
The index is what distinguishes the various tasks inside a given step.
|
2717
|
-
This call returns None if this task was not run in a foreach loop.
|
2718
|
-
|
2719
|
-
Returns
|
2720
|
-
-------
|
2721
|
-
int, optional
|
2722
|
-
Index in the innermost loop for this task
|
2723
|
-
"""
|
2724
|
-
...
|
2725
|
-
@property
|
2726
|
-
def data(self) -> metaflow.client.core.MetaflowData:
|
2727
|
-
"""
|
2728
|
-
Returns a container of data artifacts produced by this task.
|
2729
|
-
|
2730
|
-
You can access data produced by this task as follows:
|
2731
|
-
```
|
2732
|
-
print(task.data.my_var)
|
2733
|
-
```
|
2734
|
-
|
2735
|
-
Returns
|
2736
|
-
-------
|
2737
|
-
MetaflowData
|
2738
|
-
Container of all artifacts produced by this task
|
2739
|
-
"""
|
2740
|
-
...
|
2741
|
-
@property
|
2742
|
-
def artifacts(self) -> typing.NamedTuple:
|
2743
|
-
"""
|
2744
|
-
Returns a container of DataArtifacts produced by this task.
|
2745
|
-
|
2746
|
-
You can access each DataArtifact by name like so:
|
2747
|
-
```
|
2748
|
-
print(task.artifacts.my_var)
|
2749
|
-
```
|
2750
|
-
This method differs from data() because it returns DataArtifact objects
|
2751
|
-
(which contain additional metadata) as opposed to just the data.
|
2752
|
-
|
2753
|
-
Returns
|
2754
|
-
-------
|
2755
|
-
MetaflowArtifacts
|
2756
|
-
Container of all DataArtifacts produced by this task
|
2757
|
-
"""
|
2758
|
-
...
|
2759
|
-
@property
|
2760
|
-
def successful(self) -> bool:
|
2761
|
-
"""
|
2762
|
-
Indicates whether or not the task completed successfully.
|
2763
|
-
|
2764
|
-
This information is always about the latest task to have completed (in case
|
2765
|
-
of retries).
|
2766
|
-
|
2767
|
-
Returns
|
2768
|
-
-------
|
2769
|
-
bool
|
2770
|
-
True if the task completed successfully and False otherwise
|
2771
|
-
"""
|
2772
|
-
...
|
2773
|
-
@property
|
2774
|
-
def finished(self) -> bool:
|
2775
|
-
"""
|
2776
|
-
Indicates whether or not the task completed.
|
2777
|
-
|
2778
|
-
This information is always about the latest task to have completed (in case
|
2779
|
-
of retries).
|
2780
|
-
|
2781
|
-
Returns
|
2782
|
-
-------
|
2783
|
-
bool
|
2784
|
-
True if the task completed and False otherwise
|
2785
|
-
"""
|
2786
|
-
...
|
2787
|
-
@property
|
2788
|
-
def exception(self) -> typing.Optional[typing.Any]:
|
2789
|
-
"""
|
2790
|
-
Returns the exception that caused the task to fail, if any.
|
2791
|
-
|
2792
|
-
This information is always about the latest task to have completed (in case
|
2793
|
-
of retries). If successful() returns False and finished() returns True,
|
2794
|
-
this method can help determine what went wrong.
|
2795
|
-
|
2796
|
-
Returns
|
2797
|
-
-------
|
2798
|
-
object
|
2799
|
-
Exception raised by the task or None if not applicable
|
2800
|
-
"""
|
2801
|
-
...
|
2802
|
-
@property
|
2803
|
-
def finished_at(self) -> typing.Optional[datetime.datetime]:
|
2804
|
-
"""
|
2805
|
-
Returns the datetime object of when the task finished (successfully or not).
|
2806
|
-
|
2807
|
-
This information is always about the latest task to have completed (in case
|
2808
|
-
of retries). This call will return None if the task is not finished.
|
2809
|
-
|
2810
|
-
Returns
|
2811
|
-
-------
|
2812
|
-
datetime
|
2813
|
-
Datetime of when the task finished
|
2814
|
-
"""
|
2815
|
-
...
|
2816
|
-
@property
|
2817
|
-
def runtime_name(self) -> typing.Optional[str]:
|
2818
|
-
"""
|
2819
|
-
Returns the name of the runtime this task executed on.
|
2820
|
-
|
2821
|
-
|
2822
|
-
Returns
|
2823
|
-
-------
|
2824
|
-
str
|
2825
|
-
Name of the runtime this task executed on
|
2826
|
-
"""
|
2827
|
-
...
|
2828
|
-
@property
|
2829
|
-
def stdout(self) -> str:
|
2830
|
-
"""
|
2831
|
-
Returns the full standard out of this task.
|
2832
|
-
|
2833
|
-
If you specify a specific attempt for this task, it will return the
|
2834
|
-
standard out for that attempt. If you do not specify an attempt,
|
2835
|
-
this will return the current standard out for the latest *started*
|
2836
|
-
attempt of the task. In both cases, multiple calls to this
|
2837
|
-
method will return the most up-to-date log (so if an attempt is not
|
2838
|
-
done, each call will fetch the latest log).
|
2839
|
-
|
2840
|
-
Returns
|
2841
|
-
-------
|
2842
|
-
str
|
2843
|
-
Standard output of this task
|
2844
|
-
"""
|
2845
|
-
...
|
2846
|
-
@property
|
2847
|
-
def stdout_size(self) -> int:
|
2848
|
-
"""
|
2849
|
-
Returns the size of the stdout log of this task.
|
2850
|
-
|
2851
|
-
Similar to `stdout`, the size returned is the latest size of the log
|
2852
|
-
(so for a running attempt, this value will increase as the task produces
|
2853
|
-
more output).
|
2854
|
-
|
2855
|
-
Returns
|
2856
|
-
-------
|
2857
|
-
int
|
2858
|
-
Size of the stdout log content (in bytes)
|
2859
|
-
"""
|
2860
|
-
...
|
2861
|
-
@property
|
2862
|
-
def stderr(self) -> str:
|
2863
|
-
"""
|
2864
|
-
Returns the full standard error of this task.
|
2865
|
-
|
2866
|
-
If you specify a specific attempt for this task, it will return the
|
2867
|
-
standard error for that attempt. If you do not specify an attempt,
|
2868
|
-
this will return the current standard error for the latest *started*
|
2869
|
-
attempt. In both cases, multiple calls to this
|
2870
|
-
method will return the most up-to-date log (so if an attempt is not
|
2871
|
-
done, each call will fetch the latest log).
|
2872
|
-
|
2873
|
-
Returns
|
2874
|
-
-------
|
2875
|
-
str
|
2876
|
-
Standard error of this task
|
2877
|
-
"""
|
2878
|
-
...
|
2879
|
-
@property
|
2880
|
-
def stderr_size(self) -> int:
|
2881
|
-
"""
|
2882
|
-
Returns the size of the stderr log of this task.
|
2883
|
-
|
2884
|
-
Similar to `stderr`, the size returned is the latest size of the log
|
2885
|
-
(so for a running attempt, this value will increase as the task produces
|
2886
|
-
more output).
|
2887
|
-
|
2888
|
-
Returns
|
2889
|
-
-------
|
2890
|
-
int
|
2891
|
-
Size of the stderr log content (in bytes)
|
2892
|
-
"""
|
2893
|
-
...
|
2894
|
-
@property
|
2895
|
-
def current_attempt(self) -> int:
|
2896
|
-
"""
|
2897
|
-
Get the relevant attempt for this Task.
|
2898
|
-
|
2899
|
-
Returns the specific attempt used when
|
2900
|
-
initializing the instance, or the latest *started* attempt for the Task.
|
2901
|
-
|
2902
|
-
Returns
|
2903
|
-
-------
|
2904
|
-
int
|
2905
|
-
attempt id for this task object
|
2906
|
-
"""
|
2907
|
-
...
|
2908
|
-
@property
|
2909
|
-
def code(self) -> typing.Optional[metaflow.client.core.MetaflowCode]:
|
2910
|
-
"""
|
2911
|
-
Returns the MetaflowCode object for this task, if present.
|
2912
|
-
|
2913
|
-
Not all tasks save their code so this call may return None in those cases.
|
2914
|
-
|
2915
|
-
Returns
|
2916
|
-
-------
|
2917
|
-
MetaflowCode
|
2918
|
-
Code package for this task
|
2919
|
-
"""
|
2920
|
-
...
|
2921
|
-
@property
|
2922
|
-
def environment_info(self) -> typing.Dict[str, typing.Any]:
|
2923
|
-
"""
|
2924
|
-
Returns information about the environment that was used to execute this task. As an
|
2925
|
-
example, if the Conda environment is selected, this will return information about the
|
2926
|
-
dependencies that were used in the environment.
|
2927
|
-
|
2928
|
-
This environment information is only available for tasks that have a code package.
|
2929
|
-
|
2930
|
-
Returns
|
2931
|
-
-------
|
2932
|
-
Dict
|
2933
|
-
Dictionary describing the environment
|
2934
|
-
"""
|
2935
|
-
...
|
2936
|
-
def loglines(self, stream: str, as_unicode: bool = True, meta_dict: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Iterator[typing.Tuple[datetime.datetime, str]]:
|
2937
|
-
"""
|
2938
|
-
Return an iterator over (utc_timestamp, logline) tuples.
|
2939
|
-
|
2940
|
-
Parameters
|
2941
|
-
----------
|
2942
|
-
stream : str
|
2943
|
-
Either 'stdout' or 'stderr'.
|
2944
|
-
as_unicode : bool, default: True
|
2945
|
-
If as_unicode=False, each logline is returned as a byte object. Otherwise,
|
2946
|
-
it is returned as a (unicode) string.
|
2947
|
-
|
2948
|
-
Yields
|
2949
|
-
------
|
2950
|
-
Tuple[datetime, str]
|
2951
|
-
Tuple of timestamp, logline pairs.
|
2952
|
-
"""
|
2953
|
-
...
|
2954
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.DataArtifact]:
|
2955
|
-
"""
|
2956
|
-
Iterate over all children DataArtifact of this Task
|
2957
|
-
|
2958
|
-
Yields
|
2959
|
-
------
|
2960
|
-
DataArtifact
|
2961
|
-
A DataArtifact in this Step
|
2962
|
-
"""
|
2963
|
-
...
|
2964
|
-
def __getitem__(self, name: str) -> metaflow.client.core.DataArtifact:
|
2965
|
-
"""
|
2966
|
-
Returns the DataArtifact object with the artifact name 'name'
|
2967
|
-
|
2968
|
-
Parameters
|
2969
|
-
----------
|
2970
|
-
name : str
|
2971
|
-
Data artifact name
|
2972
|
-
|
2973
|
-
Returns
|
2974
|
-
-------
|
2975
|
-
DataArtifact
|
2976
|
-
DataArtifact for this artifact name in this task
|
2977
|
-
|
2978
|
-
Raises
|
2979
|
-
------
|
2980
|
-
KeyError
|
2981
|
-
If the name does not identify a valid DataArtifact object
|
2982
|
-
"""
|
2983
|
-
...
|
2984
|
-
def __getstate__(self):
|
2985
|
-
...
|
2986
|
-
def __setstate__(self, state):
|
2987
|
-
...
|
2988
|
-
...
|
2989
|
-
|
2990
|
-
class DataArtifact(metaflow.client.core.MetaflowObject, metaclass=type):
|
2991
|
-
"""
|
2992
|
-
A single data artifact and associated metadata. Note that this object does
|
2993
|
-
not contain other objects as it is the leaf object in the hierarchy.
|
2994
|
-
|
2995
|
-
Attributes
|
2996
|
-
----------
|
2997
|
-
data : object
|
2998
|
-
The data contained in this artifact, that is, the object produced during
|
2999
|
-
execution of this run.
|
3000
|
-
sha : string
|
3001
|
-
A unique ID of this artifact.
|
3002
|
-
finished_at : datetime
|
3003
|
-
Corresponds roughly to the `Task.finished_at` time of the parent `Task`.
|
3004
|
-
An alias for `DataArtifact.created_at`.
|
3005
|
-
"""
|
3006
|
-
@property
|
3007
|
-
def data(self) -> typing.Any:
|
3008
|
-
"""
|
3009
|
-
Unpickled representation of the data contained in this artifact.
|
3010
|
-
|
3011
|
-
Returns
|
3012
|
-
-------
|
3013
|
-
object
|
3014
|
-
Object contained in this artifact
|
3015
|
-
"""
|
3016
|
-
...
|
3017
|
-
@property
|
3018
|
-
def size(self) -> int:
|
3019
|
-
"""
|
3020
|
-
Returns the size (in bytes) of the pickled object representing this
|
3021
|
-
DataArtifact
|
3022
|
-
|
3023
|
-
Returns
|
3024
|
-
-------
|
3025
|
-
int
|
3026
|
-
size of the pickled representation of data artifact (in bytes)
|
3027
|
-
"""
|
3028
|
-
...
|
3029
|
-
@property
|
3030
|
-
def sha(self) -> str:
|
3031
|
-
"""
|
3032
|
-
Unique identifier for this artifact.
|
3033
|
-
|
3034
|
-
This is a unique hash of the artifact (historically SHA1 hash)
|
3035
|
-
|
3036
|
-
Returns
|
3037
|
-
-------
|
3038
|
-
str
|
3039
|
-
Hash of this artifact
|
3040
|
-
"""
|
3041
|
-
...
|
3042
|
-
@property
|
3043
|
-
def finished_at(self) -> datetime.datetime:
|
3044
|
-
"""
|
3045
|
-
Creation time for this artifact.
|
3046
|
-
|
3047
|
-
Alias for created_at.
|
3048
|
-
|
3049
|
-
Returns
|
3050
|
-
-------
|
3051
|
-
datetime
|
3052
|
-
Creation time
|
3053
|
-
"""
|
3054
|
-
...
|
3055
|
-
def __getstate__(self):
|
3056
|
-
...
|
3057
|
-
def __setstate__(self, state):
|
3058
|
-
...
|
3059
|
-
...
|
3060
|
-
|
3061
|
-
class Runner(object, metaclass=type):
|
3062
|
-
"""
|
3063
|
-
Metaflow's Runner API that presents a programmatic interface
|
3064
|
-
to run flows and perform other operations either synchronously or asynchronously.
|
3065
|
-
The class expects a path to the flow file along with optional arguments
|
3066
|
-
that match top-level options on the command-line.
|
3067
|
-
|
3068
|
-
This class works as a context manager, calling `cleanup()` to remove
|
3069
|
-
temporary files at exit.
|
661
|
+
Specifies the flow(s) that this flow depends on.
|
3070
662
|
|
3071
|
-
Example:
|
3072
|
-
```python
|
3073
|
-
with Runner('slowflow.py', pylint=False) as runner:
|
3074
|
-
result = runner.run(alpha=5, tags=["abc", "def"], max_workers=5)
|
3075
|
-
print(result.run.finished)
|
3076
663
|
```
|
3077
|
-
|
3078
|
-
|
3079
|
-
|
3080
|
-
flow_file : str
|
3081
|
-
Path to the flow file to run
|
3082
|
-
show_output : bool, default True
|
3083
|
-
Show the 'stdout' and 'stderr' to the console by default,
|
3084
|
-
Only applicable for synchronous 'run' and 'resume' functions.
|
3085
|
-
profile : Optional[str], default None
|
3086
|
-
Metaflow profile to use to run this run. If not specified, the default
|
3087
|
-
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
3088
|
-
env : Optional[Dict], default None
|
3089
|
-
Additional environment variables to set for the Run. This overrides the
|
3090
|
-
environment set for this process.
|
3091
|
-
cwd : Optional[str], default None
|
3092
|
-
The directory to run the subprocess in; if not specified, the current
|
3093
|
-
directory is used.
|
3094
|
-
file_read_timeout : int, default 3600
|
3095
|
-
The timeout until which we try to read the runner attribute file.
|
3096
|
-
**kwargs : Any
|
3097
|
-
Additional arguments that you would pass to `python myflow.py` before
|
3098
|
-
the `run` command.
|
3099
|
-
"""
|
3100
|
-
def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3101
|
-
...
|
3102
|
-
def __enter__(self) -> metaflow.runner.metaflow_runner.Runner:
|
3103
|
-
...
|
3104
|
-
def __aenter__(self) -> metaflow.runner.metaflow_runner.Runner:
|
3105
|
-
...
|
3106
|
-
def _Runner__get_executing_run(self, tfp_runner_attribute, command_obj):
|
3107
|
-
...
|
3108
|
-
def run(self, **kwargs) -> metaflow.runner.metaflow_runner.ExecutingRun:
|
3109
|
-
"""
|
3110
|
-
Blocking execution of the run. This method will wait until
|
3111
|
-
the run has completed execution.
|
3112
|
-
|
3113
|
-
Parameters
|
3114
|
-
----------
|
3115
|
-
**kwargs : Any
|
3116
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3117
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3118
|
-
|
3119
|
-
Returns
|
3120
|
-
-------
|
3121
|
-
ExecutingRun
|
3122
|
-
ExecutingRun containing the results of the run.
|
3123
|
-
"""
|
3124
|
-
...
|
3125
|
-
def resume(self, **kwargs):
|
3126
|
-
"""
|
3127
|
-
Blocking resume execution of the run.
|
3128
|
-
This method will wait until the resumed run has completed execution.
|
3129
|
-
|
3130
|
-
Parameters
|
3131
|
-
----------
|
3132
|
-
**kwargs : Any
|
3133
|
-
Additional arguments that you would pass to `python ./myflow.py` after
|
3134
|
-
the `resume` command.
|
3135
|
-
|
3136
|
-
Returns
|
3137
|
-
-------
|
3138
|
-
ExecutingRun
|
3139
|
-
ExecutingRun containing the results of the resumed run.
|
3140
|
-
"""
|
3141
|
-
...
|
3142
|
-
def async_run(self, **kwargs) -> metaflow.runner.metaflow_runner.ExecutingRun:
|
3143
|
-
"""
|
3144
|
-
Non-blocking execution of the run. This method will return as soon as the
|
3145
|
-
run has launched.
|
3146
|
-
|
3147
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3148
|
-
|
3149
|
-
Parameters
|
3150
|
-
----------
|
3151
|
-
**kwargs : Any
|
3152
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3153
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3154
|
-
|
3155
|
-
Returns
|
3156
|
-
-------
|
3157
|
-
ExecutingRun
|
3158
|
-
ExecutingRun representing the run that was started.
|
3159
|
-
"""
|
3160
|
-
...
|
3161
|
-
def async_resume(self, **kwargs):
|
3162
|
-
"""
|
3163
|
-
Non-blocking resume execution of the run.
|
3164
|
-
This method will return as soon as the resume has launched.
|
3165
|
-
|
3166
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3167
|
-
|
3168
|
-
Parameters
|
3169
|
-
----------
|
3170
|
-
**kwargs : Any
|
3171
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3172
|
-
the `resume` command.
|
3173
|
-
|
3174
|
-
Returns
|
3175
|
-
-------
|
3176
|
-
ExecutingRun
|
3177
|
-
ExecutingRun representing the resumed run that was started.
|
3178
|
-
"""
|
3179
|
-
...
|
3180
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
3181
|
-
...
|
3182
|
-
def __aexit__(self, exc_type, exc_value, traceback):
|
3183
|
-
...
|
3184
|
-
def cleanup(self):
|
3185
|
-
"""
|
3186
|
-
Delete any temporary files created during execution.
|
3187
|
-
"""
|
3188
|
-
...
|
3189
|
-
...
|
3190
|
-
|
3191
|
-
class NBRunner(object, metaclass=type):
|
3192
|
-
"""
|
3193
|
-
A wrapper over `Runner` for executing flows defined in a Jupyter
|
3194
|
-
notebook cell.
|
3195
|
-
|
3196
|
-
Instantiate this class on the last line of a notebook cell where
|
3197
|
-
a `flow` is defined. In contrast to `Runner`, this class is not
|
3198
|
-
meant to be used in a context manager. Instead, use a blocking helper
|
3199
|
-
function like `nbrun` (which calls `cleanup()` internally) or call
|
3200
|
-
`cleanup()` explictly when using non-blocking APIs.
|
3201
|
-
|
3202
|
-
```python
|
3203
|
-
run = NBRunner(FlowName).nbrun()
|
664
|
+
@trigger_on_finish(flow='FooFlow')
|
665
|
+
```
|
666
|
+
or
|
3204
667
|
```
|
668
|
+
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
669
|
+
```
|
670
|
+
This decorator respects the @project decorator and triggers the flow
|
671
|
+
when upstream runs within the same namespace complete successfully
|
3205
672
|
|
3206
|
-
|
3207
|
-
|
3208
|
-
|
3209
|
-
|
3210
|
-
|
3211
|
-
|
3212
|
-
|
3213
|
-
|
3214
|
-
|
3215
|
-
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
3216
|
-
env : Optional[Dict], default None
|
3217
|
-
Additional environment variables to set for the Run. This overrides the
|
3218
|
-
environment set for this process.
|
3219
|
-
base_dir : Optional[str], default None
|
3220
|
-
The directory to run the subprocess in; if not specified, the current
|
3221
|
-
working directory is used.
|
3222
|
-
file_read_timeout : int, default 3600
|
3223
|
-
The timeout until which we try to read the runner attribute file.
|
3224
|
-
**kwargs : Any
|
3225
|
-
Additional arguments that you would pass to `python myflow.py` before
|
3226
|
-
the `run` command.
|
3227
|
-
"""
|
3228
|
-
def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3229
|
-
...
|
3230
|
-
def nbrun(self, **kwargs):
|
3231
|
-
"""
|
3232
|
-
Blocking execution of the run. This method will wait until
|
3233
|
-
the run has completed execution.
|
3234
|
-
|
3235
|
-
Note that in contrast to `run`, this method returns a
|
3236
|
-
`metaflow.Run` object directly and calls `cleanup()` internally
|
3237
|
-
to support a common notebook pattern of executing a flow and
|
3238
|
-
retrieving its results immediately.
|
3239
|
-
|
3240
|
-
Parameters
|
3241
|
-
----------
|
3242
|
-
**kwargs : Any
|
3243
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3244
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3245
|
-
|
3246
|
-
Returns
|
3247
|
-
-------
|
3248
|
-
Run
|
3249
|
-
A `metaflow.Run` object representing the finished run.
|
3250
|
-
"""
|
3251
|
-
...
|
3252
|
-
def nbresume(self, **kwargs):
|
3253
|
-
"""
|
3254
|
-
Blocking resuming of a run. This method will wait until
|
3255
|
-
the resumed run has completed execution.
|
3256
|
-
|
3257
|
-
Note that in contrast to `resume`, this method returns a
|
3258
|
-
`metaflow.Run` object directly and calls `cleanup()` internally
|
3259
|
-
to support a common notebook pattern of executing a flow and
|
3260
|
-
retrieving its results immediately.
|
3261
|
-
|
3262
|
-
Parameters
|
3263
|
-
----------
|
3264
|
-
**kwargs : Any
|
3265
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3266
|
-
the `resume` command.
|
3267
|
-
|
3268
|
-
Returns
|
3269
|
-
-------
|
3270
|
-
Run
|
3271
|
-
A `metaflow.Run` object representing the resumed run.
|
3272
|
-
"""
|
3273
|
-
...
|
3274
|
-
def run(self, **kwargs):
|
3275
|
-
"""
|
3276
|
-
Runs the flow.
|
3277
|
-
"""
|
3278
|
-
...
|
3279
|
-
def resume(self, **kwargs):
|
3280
|
-
"""
|
3281
|
-
Resumes the flow.
|
3282
|
-
"""
|
3283
|
-
...
|
3284
|
-
def async_run(self, **kwargs):
|
3285
|
-
"""
|
3286
|
-
Non-blocking execution of the run. This method will return as soon as the
|
3287
|
-
run has launched. This method is equivalent to `Runner.async_run`.
|
3288
|
-
|
3289
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3290
|
-
|
3291
|
-
|
3292
|
-
Parameters
|
3293
|
-
----------
|
3294
|
-
**kwargs : Any
|
3295
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3296
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3297
|
-
|
3298
|
-
Returns
|
3299
|
-
-------
|
3300
|
-
ExecutingRun
|
3301
|
-
ExecutingRun representing the run that was started.
|
3302
|
-
"""
|
3303
|
-
...
|
3304
|
-
def async_resume(self, **kwargs):
|
3305
|
-
"""
|
3306
|
-
Non-blocking execution of the run. This method will return as soon as the
|
3307
|
-
run has launched. This method is equivalent to `Runner.async_resume`.
|
3308
|
-
|
3309
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3310
|
-
|
3311
|
-
Parameters
|
3312
|
-
----------
|
3313
|
-
**kwargs : Any
|
3314
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3315
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3316
|
-
|
3317
|
-
Returns
|
3318
|
-
-------
|
3319
|
-
ExecutingRun
|
3320
|
-
ExecutingRun representing the run that was started.
|
3321
|
-
"""
|
3322
|
-
...
|
3323
|
-
def cleanup(self):
|
3324
|
-
"""
|
3325
|
-
Delete any temporary files created during execution.
|
3326
|
-
|
3327
|
-
Call this method after using `async_run` or `async_resume`. You don't
|
3328
|
-
have to call this after `nbrun` or `nbresume`.
|
3329
|
-
"""
|
3330
|
-
...
|
3331
|
-
...
|
3332
|
-
|
3333
|
-
class Deployer(object, metaclass=type):
|
3334
|
-
"""
|
3335
|
-
Use the `Deployer` class to configure and access one of the production
|
3336
|
-
orchestrators supported by Metaflow.
|
673
|
+
Additionally, you can specify project aware upstream flow dependencies
|
674
|
+
by specifying the fully qualified project_flow_name.
|
675
|
+
```
|
676
|
+
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
677
|
+
```
|
678
|
+
or
|
679
|
+
```
|
680
|
+
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
681
|
+
```
|
3337
682
|
|
3338
|
-
|
3339
|
-
|
3340
|
-
|
3341
|
-
|
3342
|
-
|
3343
|
-
Show the 'stdout' and 'stderr' to the console by default.
|
3344
|
-
profile : Optional[str], default None
|
3345
|
-
Metaflow profile to use for the deployment. If not specified, the default
|
3346
|
-
profile is used.
|
3347
|
-
env : Optional[Dict[str, str]], default None
|
3348
|
-
Additional environment variables to set for the deployment.
|
3349
|
-
cwd : Optional[str], default None
|
3350
|
-
The directory to run the subprocess in; if not specified, the current
|
3351
|
-
directory is used.
|
3352
|
-
file_read_timeout : int, default 3600
|
3353
|
-
The timeout until which we try to read the deployer attribute file.
|
3354
|
-
**kwargs : Any
|
3355
|
-
Additional arguments that you would pass to `python myflow.py` before
|
3356
|
-
the deployment command.
|
3357
|
-
"""
|
3358
|
-
def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3359
|
-
...
|
3360
|
-
def _Deployer__make_function(self, deployer_class):
|
3361
|
-
"""
|
3362
|
-
Create a function for the given deployer class.
|
3363
|
-
|
3364
|
-
Parameters
|
3365
|
-
----------
|
3366
|
-
deployer_class : Type[DeployerImpl]
|
3367
|
-
Deployer implementation class.
|
3368
|
-
|
3369
|
-
Returns
|
3370
|
-
-------
|
3371
|
-
Callable
|
3372
|
-
Function that initializes and returns an instance of the deployer class.
|
3373
|
-
"""
|
3374
|
-
...
|
3375
|
-
...
|
3376
|
-
|
3377
|
-
class NBDeployer(object, metaclass=type):
|
3378
|
-
"""
|
3379
|
-
A wrapper over `Deployer` for deploying flows defined in a Jupyter
|
3380
|
-
notebook cell.
|
683
|
+
You can also specify just the project or project branch (other values will be
|
684
|
+
inferred from the current project or project branch):
|
685
|
+
```
|
686
|
+
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
687
|
+
```
|
3381
688
|
|
3382
|
-
|
3383
|
-
|
3384
|
-
|
689
|
+
Note that `branch` is typically one of:
|
690
|
+
- `prod`
|
691
|
+
- `user.bob`
|
692
|
+
- `test.my_experiment`
|
693
|
+
- `prod.staging`
|
694
|
+
"""
|
695
|
+
...
|
696
|
+
|
697
|
+
@typing.overload
|
698
|
+
def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
699
|
+
"""
|
700
|
+
Specifies the Conda environment for all steps of the flow.
|
3385
701
|
|
3386
|
-
|
3387
|
-
|
3388
|
-
|
3389
|
-
|
3390
|
-
|
3391
|
-
|
3392
|
-
|
3393
|
-
|
3394
|
-
|
702
|
+
Use `@conda_base` to set common libraries required by all
|
703
|
+
steps and use `@conda` to specify step-specific additions.
|
704
|
+
"""
|
705
|
+
...
|
706
|
+
|
707
|
+
@typing.overload
|
708
|
+
def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
709
|
+
...
|
710
|
+
|
711
|
+
def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
|
712
|
+
"""
|
713
|
+
Specifies the Conda environment for all steps of the flow.
|
3395
714
|
|
3396
|
-
|
3397
|
-
|
3398
|
-
|
3399
|
-
Flow defined in the same cell
|
3400
|
-
show_output : bool, default True
|
3401
|
-
Show the 'stdout' and 'stderr' to the console by default,
|
3402
|
-
profile : Optional[str], default None
|
3403
|
-
Metaflow profile to use to deploy this run. If not specified, the default
|
3404
|
-
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
3405
|
-
env : Optional[Dict[str, str]], default None
|
3406
|
-
Additional environment variables to set. This overrides the
|
3407
|
-
environment set for this process.
|
3408
|
-
base_dir : Optional[str], default None
|
3409
|
-
The directory to run the subprocess in; if not specified, the current
|
3410
|
-
working directory is used.
|
3411
|
-
**kwargs : Any
|
3412
|
-
Additional arguments that you would pass to `python myflow.py` i.e. options
|
3413
|
-
listed in `python myflow.py --help`
|
3414
|
-
"""
|
3415
|
-
def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3416
|
-
...
|
3417
|
-
def cleanup(self):
|
3418
|
-
"""
|
3419
|
-
Delete any temporary files created during execution.
|
3420
|
-
"""
|
3421
|
-
...
|
715
|
+
Use `@conda_base` to set common libraries required by all
|
716
|
+
steps and use `@conda` to specify step-specific additions.
|
717
|
+
"""
|
3422
718
|
...
|
3423
719
|
|