metaflow-stubs 2.12.28__py2.py3-none-any.whl → 2.12.30__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow-stubs/__init__.pyi +279 -2983
- metaflow-stubs/cards.pyi +19 -473
- metaflow-stubs/cli.pyi +17 -81
- metaflow-stubs/client/__init__.pyi +19 -1113
- metaflow-stubs/client/core.pyi +19 -159
- metaflow-stubs/client/filecache.pyi +7 -11
- metaflow-stubs/clone_util.pyi +6 -26
- metaflow-stubs/events.pyi +7 -6
- metaflow-stubs/exception.pyi +8 -6
- metaflow-stubs/flowspec.pyi +21 -105
- metaflow-stubs/generated_for.txt +1 -1
- metaflow-stubs/includefile.pyi +17 -565
- metaflow-stubs/info_file.pyi +6 -5
- metaflow-stubs/metadata_provider/__init__.pyi +16 -0
- metaflow-stubs/metadata_provider/heartbeat.pyi +34 -0
- metaflow-stubs/{metadata → metadata_provider}/metadata.pyi +10 -22
- metaflow-stubs/metadata_provider/util.pyi +19 -0
- metaflow-stubs/metaflow_config.pyi +8 -11
- metaflow-stubs/metaflow_current.pyi +32 -31
- metaflow-stubs/mflog/__init__.pyi +6 -0
- metaflow-stubs/mflog/mflog.pyi +52 -5
- metaflow-stubs/multicore_utils.pyi +6 -5
- metaflow-stubs/parameters.pyi +12 -22
- metaflow-stubs/plugins/__init__.pyi +51 -163
- metaflow-stubs/plugins/airflow/__init__.pyi +12 -5
- metaflow-stubs/plugins/airflow/airflow.pyi +19 -130
- metaflow-stubs/plugins/airflow/airflow_cli.pyi +17 -136
- metaflow-stubs/plugins/airflow/airflow_decorator.pyi +7 -26
- metaflow-stubs/plugins/airflow/airflow_utils.pyi +7 -6
- metaflow-stubs/plugins/airflow/exception.pyi +7 -11
- metaflow-stubs/plugins/airflow/sensors/__init__.pyi +10 -97
- metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +9 -30
- metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +9 -40
- metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +9 -40
- metaflow-stubs/plugins/argo/__init__.pyi +12 -5
- metaflow-stubs/plugins/argo/argo_client.pyi +8 -26
- metaflow-stubs/plugins/argo/argo_events.pyi +7 -11
- metaflow-stubs/plugins/argo/argo_workflows.pyi +17 -121
- metaflow-stubs/plugins/argo/argo_workflows_cli.pyi +22 -460
- metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +12 -404
- metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +65 -322
- metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +165 -0
- metaflow-stubs/plugins/aws/__init__.pyi +11 -5
- metaflow-stubs/plugins/aws/aws_client.pyi +6 -5
- metaflow-stubs/plugins/aws/aws_utils.pyi +6 -11
- metaflow-stubs/plugins/aws/batch/__init__.pyi +10 -5
- metaflow-stubs/plugins/aws/batch/batch.pyi +10 -55
- metaflow-stubs/plugins/aws/batch/batch_cli.pyi +10 -31
- metaflow-stubs/plugins/aws/batch/batch_client.pyi +7 -11
- metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +15 -140
- metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +7 -5
- metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +10 -21
- metaflow-stubs/plugins/aws/step_functions/__init__.pyi +15 -5
- metaflow-stubs/plugins/aws/step_functions/dynamo_db_client.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/production_token.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +7 -5
- metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +11 -65
- metaflow-stubs/plugins/aws/step_functions/step_functions_cli.pyi +19 -175
- metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +6 -5
- metaflow-stubs/plugins/aws/step_functions/step_functions_decorator.pyi +8 -37
- metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +53 -290
- metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +127 -0
- metaflow-stubs/plugins/azure/__init__.pyi +12 -7
- metaflow-stubs/plugins/azure/azure_credential.pyi +6 -5
- metaflow-stubs/plugins/azure/azure_exceptions.pyi +7 -11
- metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +11 -24
- metaflow-stubs/plugins/azure/azure_utils.pyi +11 -29
- metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +8 -23
- metaflow-stubs/plugins/azure/includefile_support.pyi +7 -17
- metaflow-stubs/plugins/cards/__init__.pyi +15 -5
- metaflow-stubs/plugins/cards/card_cli.pyi +22 -491
- metaflow-stubs/plugins/cards/card_client.pyi +14 -76
- metaflow-stubs/plugins/cards/card_creator.pyi +7 -10
- metaflow-stubs/plugins/cards/card_datastore.pyi +10 -18
- metaflow-stubs/plugins/cards/card_decorator.pyi +10 -126
- metaflow-stubs/plugins/cards/card_modules/__init__.pyi +14 -81
- metaflow-stubs/plugins/cards/card_modules/basic.pyi +13 -96
- metaflow-stubs/plugins/cards/card_modules/card.pyi +6 -5
- metaflow-stubs/plugins/cards/card_modules/chevron/__init__.pyi +12 -73
- metaflow-stubs/plugins/cards/card_modules/chevron/main.pyi +6 -61
- metaflow-stubs/plugins/cards/card_modules/chevron/metadata.pyi +6 -5
- metaflow-stubs/plugins/cards/card_modules/chevron/renderer.pyi +8 -45
- metaflow-stubs/plugins/cards/card_modules/chevron/tokenizer.pyi +7 -6
- metaflow-stubs/plugins/cards/card_modules/components.pyi +24 -107
- metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +6 -5
- metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +6 -12
- metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +11 -88
- metaflow-stubs/plugins/cards/card_resolver.pyi +6 -49
- metaflow-stubs/plugins/cards/component_serializer.pyi +13 -63
- metaflow-stubs/plugins/cards/exception.pyi +7 -11
- metaflow-stubs/plugins/catch_decorator.pyi +9 -29
- metaflow-stubs/plugins/datatools/__init__.pyi +13 -392
- metaflow-stubs/plugins/datatools/local.pyi +7 -11
- metaflow-stubs/plugins/datatools/s3/__init__.pyi +19 -653
- metaflow-stubs/plugins/datatools/s3/s3.pyi +15 -263
- metaflow-stubs/plugins/datatools/s3/s3tail.pyi +7 -10
- metaflow-stubs/plugins/datatools/s3/s3util.pyi +6 -11
- metaflow-stubs/plugins/debug_logger.pyi +7 -5
- metaflow-stubs/plugins/debug_monitor.pyi +7 -5
- metaflow-stubs/plugins/environment_decorator.pyi +7 -5
- metaflow-stubs/plugins/events_decorator.pyi +8 -14
- metaflow-stubs/plugins/frameworks/__init__.pyi +7 -5
- metaflow-stubs/plugins/frameworks/pytorch.pyi +8 -45
- metaflow-stubs/plugins/gcp/__init__.pyi +11 -7
- metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +11 -24
- metaflow-stubs/plugins/gcp/gs_exceptions.pyi +7 -11
- metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +6 -5
- metaflow-stubs/plugins/gcp/gs_utils.pyi +8 -20
- metaflow-stubs/plugins/gcp/includefile_support.pyi +7 -17
- metaflow-stubs/plugins/kubernetes/__init__.pyi +13 -5
- metaflow-stubs/plugins/kubernetes/kube_utils.pyi +6 -10
- metaflow-stubs/plugins/kubernetes/kubernetes.pyi +9 -29
- metaflow-stubs/plugins/kubernetes/kubernetes_cli.pyi +16 -155
- metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +9 -72
- metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +19 -142
- metaflow-stubs/plugins/kubernetes/kubernetes_job.pyi +8 -41
- metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +7 -11
- metaflow-stubs/plugins/logs_cli.pyi +10 -9
- metaflow-stubs/plugins/package_cli.pyi +7 -5
- metaflow-stubs/plugins/parallel_decorator.pyi +11 -59
- metaflow-stubs/plugins/project_decorator.pyi +8 -14
- metaflow-stubs/plugins/pypi/__init__.pyi +12 -11
- metaflow-stubs/plugins/pypi/conda_decorator.pyi +8 -27
- metaflow-stubs/plugins/pypi/conda_environment.pyi +13 -19
- metaflow-stubs/plugins/pypi/pypi_decorator.pyi +7 -5
- metaflow-stubs/plugins/pypi/pypi_environment.pyi +7 -39
- metaflow-stubs/plugins/pypi/utils.pyi +7 -11
- metaflow-stubs/plugins/resources_decorator.pyi +7 -5
- metaflow-stubs/plugins/retry_decorator.pyi +7 -11
- metaflow-stubs/plugins/secrets/__init__.pyi +9 -5
- metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +9 -14
- metaflow-stubs/plugins/secrets/secrets_decorator.pyi +7 -11
- metaflow-stubs/plugins/storage_executor.pyi +6 -11
- metaflow-stubs/plugins/tag_cli.pyi +14 -396
- metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +9 -34
- metaflow-stubs/plugins/timeout_decorator.pyi +7 -11
- metaflow-stubs/procpoll.pyi +7 -5
- metaflow-stubs/pylint_wrapper.pyi +7 -11
- metaflow-stubs/runner/__init__.pyi +13 -5
- metaflow-stubs/runner/deployer.pyi +102 -210
- metaflow-stubs/runner/deployer_impl.pyi +87 -0
- metaflow-stubs/runner/metaflow_runner.pyi +23 -507
- metaflow-stubs/runner/nbdeploy.pyi +16 -60
- metaflow-stubs/runner/nbrun.pyi +11 -148
- metaflow-stubs/runner/subprocess_manager.pyi +9 -10
- metaflow-stubs/runner/utils.pyi +44 -9
- metaflow-stubs/system/__init__.pyi +9 -87
- metaflow-stubs/system/system_logger.pyi +7 -6
- metaflow-stubs/system/system_monitor.pyi +6 -5
- metaflow-stubs/tagging_util.pyi +6 -10
- metaflow-stubs/tuple_util.pyi +6 -5
- metaflow-stubs/version.pyi +6 -5
- {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.30.dist-info}/METADATA +2 -2
- metaflow_stubs-2.12.30.dist-info/RECORD +158 -0
- {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.30.dist-info}/WHEEL +1 -1
- metaflow-stubs/metadata/util.pyi +0 -18
- metaflow_stubs-2.12.28.dist-info/RECORD +0 -152
- {metaflow_stubs-2.12.28.dist-info → metaflow_stubs-2.12.30.dist-info}/top_level.txt +0 -0
metaflow-stubs/__init__.pyi
CHANGED
@@ -1,785 +1,68 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# MF version: 2.12.
|
4
|
-
# Generated on 2024-11-
|
5
|
-
|
1
|
+
######################################################################################################
|
2
|
+
# Auto-generated Metaflow stub file #
|
3
|
+
# MF version: 2.12.30 #
|
4
|
+
# Generated on 2024-11-13T13:50:31.391188 #
|
5
|
+
######################################################################################################
|
6
6
|
|
7
7
|
from __future__ import annotations
|
8
8
|
|
9
9
|
import typing
|
10
10
|
if typing.TYPE_CHECKING:
|
11
|
-
import metaflow.metaflow_current
|
12
11
|
import typing
|
13
12
|
import datetime
|
14
|
-
import io
|
15
|
-
import metaflow.runner.metaflow_runner
|
16
|
-
import metaflow.client.core
|
17
|
-
import metaflow.events
|
18
|
-
import metaflow.datastore.inputs
|
19
|
-
import metaflow.flowspec
|
20
|
-
import metaflow._vendor.click.types
|
21
|
-
import metaflow.parameters
|
22
|
-
import metaflow.plugins.datatools.s3.s3
|
23
13
|
FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
|
24
14
|
StepFlag = typing.NewType("StepFlag", bool)
|
25
15
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
current: metaflow.metaflow_current.Current
|
76
|
-
|
77
|
-
def metadata(ms: str) -> str:
|
78
|
-
"""
|
79
|
-
Switch Metadata provider.
|
80
|
-
|
81
|
-
This call has a global effect. Selecting the local metadata will,
|
82
|
-
for example, not allow access to information stored in remote
|
83
|
-
metadata providers.
|
84
|
-
|
85
|
-
Note that you don't typically have to call this function directly. Usually
|
86
|
-
the metadata provider is set through the Metaflow configuration file. If you
|
87
|
-
need to switch between multiple providers, you can use the `METAFLOW_PROFILE`
|
88
|
-
environment variable to switch between configurations.
|
89
|
-
|
90
|
-
Parameters
|
91
|
-
----------
|
92
|
-
ms : str
|
93
|
-
Can be a path (selects local metadata), a URL starting with http (selects
|
94
|
-
the service metadata) or an explicit specification <metadata_type>@<info>; as an
|
95
|
-
example, you can specify local@<path> or service@<url>.
|
96
|
-
|
97
|
-
Returns
|
98
|
-
-------
|
99
|
-
str
|
100
|
-
The description of the metadata selected (equivalent to the result of
|
101
|
-
get_metadata()).
|
102
|
-
"""
|
103
|
-
...
|
16
|
+
from . import info_file as info_file
|
17
|
+
from . import exception as exception
|
18
|
+
from . import metaflow_config as metaflow_config
|
19
|
+
from . import multicore_utils as multicore_utils
|
20
|
+
from .multicore_utils import parallel_imap_unordered as parallel_imap_unordered
|
21
|
+
from .multicore_utils import parallel_map as parallel_map
|
22
|
+
from . import metaflow_current as metaflow_current
|
23
|
+
from .metaflow_current import current as current
|
24
|
+
from . import parameters as parameters
|
25
|
+
from . import tagging_util as tagging_util
|
26
|
+
from . import metadata_provider as metadata_provider
|
27
|
+
from . import flowspec as flowspec
|
28
|
+
from .flowspec import FlowSpec as FlowSpec
|
29
|
+
from .parameters import Parameter as Parameter
|
30
|
+
from .parameters import JSONTypeClass as JSONTypeClass
|
31
|
+
from .parameters import JSONType as JSONType
|
32
|
+
from . import tuple_util as tuple_util
|
33
|
+
from . import events as events
|
34
|
+
from . import runner as runner
|
35
|
+
from . import plugins as plugins
|
36
|
+
from .plugins.datatools.s3.s3 import S3 as S3
|
37
|
+
from . import includefile as includefile
|
38
|
+
from .includefile import IncludeFile as IncludeFile
|
39
|
+
from . import cards as cards
|
40
|
+
from . import client as client
|
41
|
+
from .client.core import namespace as namespace
|
42
|
+
from .client.core import get_namespace as get_namespace
|
43
|
+
from .client.core import default_namespace as default_namespace
|
44
|
+
from .client.core import metadata as metadata
|
45
|
+
from .client.core import get_metadata as get_metadata
|
46
|
+
from .client.core import default_metadata as default_metadata
|
47
|
+
from .client.core import Metaflow as Metaflow
|
48
|
+
from .client.core import Flow as Flow
|
49
|
+
from .client.core import Run as Run
|
50
|
+
from .client.core import Step as Step
|
51
|
+
from .client.core import Task as Task
|
52
|
+
from .client.core import DataArtifact as DataArtifact
|
53
|
+
from .runner.metaflow_runner import Runner as Runner
|
54
|
+
from .runner.nbrun import NBRunner as NBRunner
|
55
|
+
from .runner.deployer import Deployer as Deployer
|
56
|
+
from .runner.deployer import DeployedFlow as DeployedFlow
|
57
|
+
from .runner.nbdeploy import NBDeployer as NBDeployer
|
58
|
+
from . import version as version
|
59
|
+
from . import system as system
|
60
|
+
from . import pylint_wrapper as pylint_wrapper
|
61
|
+
from . import procpoll as procpoll
|
62
|
+
from . import clone_util as clone_util
|
63
|
+
from . import cli as cli
|
104
64
|
|
105
|
-
|
106
|
-
"""
|
107
|
-
Main class from which all Flows should inherit.
|
108
|
-
|
109
|
-
Attributes
|
110
|
-
----------
|
111
|
-
index
|
112
|
-
input
|
113
|
-
"""
|
114
|
-
def __init__(self, use_cli = True):
|
115
|
-
"""
|
116
|
-
Construct a FlowSpec
|
117
|
-
|
118
|
-
Parameters
|
119
|
-
----------
|
120
|
-
use_cli : bool, default True
|
121
|
-
Set to True if the flow is invoked from __main__ or the command line
|
122
|
-
"""
|
123
|
-
...
|
124
|
-
@property
|
125
|
-
def script_name(self) -> str:
|
126
|
-
"""
|
127
|
-
[Legacy function - do not use. Use `current` instead]
|
128
|
-
|
129
|
-
Returns the name of the script containing the flow
|
130
|
-
|
131
|
-
Returns
|
132
|
-
-------
|
133
|
-
str
|
134
|
-
A string containing the name of the script
|
135
|
-
"""
|
136
|
-
...
|
137
|
-
def __iter__(self):
|
138
|
-
"""
|
139
|
-
[Legacy function - do not use]
|
140
|
-
|
141
|
-
Iterate over all steps in the Flow
|
142
|
-
|
143
|
-
Returns
|
144
|
-
-------
|
145
|
-
Iterator[graph.DAGNode]
|
146
|
-
Iterator over the steps in the flow
|
147
|
-
"""
|
148
|
-
...
|
149
|
-
def __getattr__(self, name: str):
|
150
|
-
...
|
151
|
-
def cmd(self, cmdline, input = {}, output = []):
|
152
|
-
"""
|
153
|
-
[Legacy function - do not use]
|
154
|
-
"""
|
155
|
-
...
|
156
|
-
@property
|
157
|
-
def index(self) -> typing.Optional[int]:
|
158
|
-
"""
|
159
|
-
The index of this foreach branch.
|
160
|
-
|
161
|
-
In a foreach step, multiple instances of this step (tasks) will be executed,
|
162
|
-
one for each element in the foreach. This property returns the zero based index
|
163
|
-
of the current task. If this is not a foreach step, this returns None.
|
164
|
-
|
165
|
-
If you need to know the indices of the parent tasks in a nested foreach, use
|
166
|
-
`FlowSpec.foreach_stack`.
|
167
|
-
|
168
|
-
Returns
|
169
|
-
-------
|
170
|
-
int, optional
|
171
|
-
Index of the task in a foreach step.
|
172
|
-
"""
|
173
|
-
...
|
174
|
-
@property
|
175
|
-
def input(self) -> typing.Optional[typing.Any]:
|
176
|
-
"""
|
177
|
-
The value of the foreach artifact in this foreach branch.
|
178
|
-
|
179
|
-
In a foreach step, multiple instances of this step (tasks) will be executed,
|
180
|
-
one for each element in the foreach. This property returns the element passed
|
181
|
-
to the current task. If this is not a foreach step, this returns None.
|
182
|
-
|
183
|
-
If you need to know the values of the parent tasks in a nested foreach, use
|
184
|
-
`FlowSpec.foreach_stack`.
|
185
|
-
|
186
|
-
Returns
|
187
|
-
-------
|
188
|
-
object, optional
|
189
|
-
Input passed to the foreach task.
|
190
|
-
"""
|
191
|
-
...
|
192
|
-
def foreach_stack(self) -> typing.Optional[typing.List[typing.Tuple[int, int, typing.Any]]]:
|
193
|
-
"""
|
194
|
-
Returns the current stack of foreach indexes and values for the current step.
|
195
|
-
|
196
|
-
Use this information to understand what data is being processed in the current
|
197
|
-
foreach branch. For example, considering the following code:
|
198
|
-
```
|
199
|
-
@step
|
200
|
-
def root(self):
|
201
|
-
self.split_1 = ['a', 'b', 'c']
|
202
|
-
self.next(self.nest_1, foreach='split_1')
|
203
|
-
|
204
|
-
@step
|
205
|
-
def nest_1(self):
|
206
|
-
self.split_2 = ['d', 'e', 'f', 'g']
|
207
|
-
self.next(self.nest_2, foreach='split_2'):
|
208
|
-
|
209
|
-
@step
|
210
|
-
def nest_2(self):
|
211
|
-
foo = self.foreach_stack()
|
212
|
-
```
|
213
|
-
|
214
|
-
`foo` will take the following values in the various tasks for nest_2:
|
215
|
-
```
|
216
|
-
[(0, 3, 'a'), (0, 4, 'd')]
|
217
|
-
[(0, 3, 'a'), (1, 4, 'e')]
|
218
|
-
...
|
219
|
-
[(0, 3, 'a'), (3, 4, 'g')]
|
220
|
-
[(1, 3, 'b'), (0, 4, 'd')]
|
221
|
-
...
|
222
|
-
```
|
223
|
-
where each tuple corresponds to:
|
224
|
-
|
225
|
-
- The index of the task for that level of the loop.
|
226
|
-
- The number of splits for that level of the loop.
|
227
|
-
- The value for that level of the loop.
|
228
|
-
|
229
|
-
Note that the last tuple returned in a task corresponds to:
|
230
|
-
|
231
|
-
- 1st element: value returned by `self.index`.
|
232
|
-
- 3rd element: value returned by `self.input`.
|
233
|
-
|
234
|
-
Returns
|
235
|
-
-------
|
236
|
-
List[Tuple[int, int, Any]]
|
237
|
-
An array describing the current stack of foreach steps.
|
238
|
-
"""
|
239
|
-
...
|
240
|
-
def merge_artifacts(self, inputs: metaflow.datastore.inputs.Inputs, exclude: typing.Optional[typing.List[str]] = None, include: typing.Optional[typing.List[str]] = None):
|
241
|
-
"""
|
242
|
-
Helper function for merging artifacts in a join step.
|
243
|
-
|
244
|
-
This function takes all the artifacts coming from the branches of a
|
245
|
-
join point and assigns them to self in the calling step. Only artifacts
|
246
|
-
not set in the current step are considered. If, for a given artifact, different
|
247
|
-
values are present on the incoming edges, an error will be thrown and the artifacts
|
248
|
-
that conflict will be reported.
|
249
|
-
|
250
|
-
As a few examples, in the simple graph: A splitting into B and C and joining in D:
|
251
|
-
```
|
252
|
-
A:
|
253
|
-
self.x = 5
|
254
|
-
self.y = 6
|
255
|
-
B:
|
256
|
-
self.b_var = 1
|
257
|
-
self.x = from_b
|
258
|
-
C:
|
259
|
-
self.x = from_c
|
260
|
-
|
261
|
-
D:
|
262
|
-
merge_artifacts(inputs)
|
263
|
-
```
|
264
|
-
In D, the following artifacts are set:
|
265
|
-
- `y` (value: 6), `b_var` (value: 1)
|
266
|
-
- if `from_b` and `from_c` are the same, `x` will be accessible and have value `from_b`
|
267
|
-
- if `from_b` and `from_c` are different, an error will be thrown. To prevent this error,
|
268
|
-
you need to manually set `self.x` in D to a merged value (for example the max) prior to
|
269
|
-
calling `merge_artifacts`.
|
270
|
-
|
271
|
-
Parameters
|
272
|
-
----------
|
273
|
-
inputs : Inputs
|
274
|
-
Incoming steps to the join point.
|
275
|
-
exclude : List[str], optional, default None
|
276
|
-
If specified, do not consider merging artifacts with a name in `exclude`.
|
277
|
-
Cannot specify if `include` is also specified.
|
278
|
-
include : List[str], optional, default None
|
279
|
-
If specified, only merge artifacts specified. Cannot specify if `exclude` is
|
280
|
-
also specified.
|
281
|
-
|
282
|
-
Raises
|
283
|
-
------
|
284
|
-
MetaflowException
|
285
|
-
This exception is thrown if this is not called in a join step.
|
286
|
-
UnhandledInMergeArtifactsException
|
287
|
-
This exception is thrown in case of unresolved conflicts.
|
288
|
-
MissingInMergeArtifactsException
|
289
|
-
This exception is thrown in case an artifact specified in `include` cannot
|
290
|
-
be found.
|
291
|
-
"""
|
292
|
-
...
|
293
|
-
def next(self, *dsts: typing.Callable[..., None], **kwargs):
|
294
|
-
"""
|
295
|
-
Indicates the next step to execute after this step has completed.
|
296
|
-
|
297
|
-
This statement should appear as the last statement of each step, except
|
298
|
-
the end step.
|
299
|
-
|
300
|
-
There are several valid formats to specify the next step:
|
301
|
-
|
302
|
-
- Straight-line connection: `self.next(self.next_step)` where `next_step` is a method in
|
303
|
-
the current class decorated with the `@step` decorator.
|
304
|
-
|
305
|
-
- Static fan-out connection: `self.next(self.step1, self.step2, ...)` where `stepX` are
|
306
|
-
methods in the current class decorated with the `@step` decorator.
|
307
|
-
|
308
|
-
- Foreach branch:
|
309
|
-
```
|
310
|
-
self.next(self.foreach_step, foreach='foreach_iterator')
|
311
|
-
```
|
312
|
-
In this situation, `foreach_step` is a method in the current class decorated with the
|
313
|
-
`@step` decorator and `foreach_iterator` is a variable name in the current class that
|
314
|
-
evaluates to an iterator. A task will be launched for each value in the iterator and
|
315
|
-
each task will execute the code specified by the step `foreach_step`.
|
316
|
-
|
317
|
-
Parameters
|
318
|
-
----------
|
319
|
-
dsts : Callable[..., None]
|
320
|
-
One or more methods annotated with `@step`.
|
321
|
-
|
322
|
-
Raises
|
323
|
-
------
|
324
|
-
InvalidNextException
|
325
|
-
Raised if the format of the arguments does not match one of the ones given above.
|
326
|
-
"""
|
327
|
-
...
|
328
|
-
def __str__(self):
|
329
|
-
...
|
330
|
-
def __getstate__(self):
|
331
|
-
...
|
332
|
-
...
|
333
|
-
|
334
|
-
class Parameter(object, metaclass=type):
|
335
|
-
"""
|
336
|
-
Defines a parameter for a flow.
|
337
|
-
|
338
|
-
Parameters must be instantiated as class variables in flow classes, e.g.
|
339
|
-
```
|
340
|
-
class MyFlow(FlowSpec):
|
341
|
-
param = Parameter('myparam')
|
342
|
-
```
|
343
|
-
in this case, the parameter is specified on the command line as
|
344
|
-
```
|
345
|
-
python myflow.py run --myparam=5
|
346
|
-
```
|
347
|
-
and its value is accessible through a read-only artifact like this:
|
348
|
-
```
|
349
|
-
print(self.param == 5)
|
350
|
-
```
|
351
|
-
Note that the user-visible parameter name, `myparam` above, can be
|
352
|
-
different from the artifact name, `param` above.
|
353
|
-
|
354
|
-
The parameter value is converted to a Python type based on the `type`
|
355
|
-
argument or to match the type of `default`, if it is set.
|
356
|
-
|
357
|
-
Parameters
|
358
|
-
----------
|
359
|
-
name : str
|
360
|
-
User-visible parameter name.
|
361
|
-
default : str or float or int or bool or `JSONType` or a function.
|
362
|
-
Default value for the parameter. Use a special `JSONType` class to
|
363
|
-
indicate that the value must be a valid JSON object. A function
|
364
|
-
implies that the parameter corresponds to a *deploy-time parameter*.
|
365
|
-
The type of the default value is used as the parameter `type`.
|
366
|
-
type : Type, default None
|
367
|
-
If `default` is not specified, define the parameter type. Specify
|
368
|
-
one of `str`, `float`, `int`, `bool`, or `JSONType`. If None, defaults
|
369
|
-
to the type of `default` or `str` if none specified.
|
370
|
-
help : str, optional
|
371
|
-
Help text to show in `run --help`.
|
372
|
-
required : bool, default False
|
373
|
-
Require that the user specified a value for the parameter.
|
374
|
-
`required=True` implies that the `default` is not used.
|
375
|
-
show_default : bool, default True
|
376
|
-
If True, show the default value in the help text.
|
377
|
-
"""
|
378
|
-
def __init__(self, name: str, default: typing.Union[str, float, int, bool, typing.Dict[str, typing.Any], typing.Callable[[], typing.Union[str, float, int, bool, typing.Dict[str, typing.Any]]], None] = None, type: typing.Union[typing.Type[str], typing.Type[float], typing.Type[int], typing.Type[bool], metaflow.parameters.JSONTypeClass, None] = None, help: typing.Optional[str] = None, required: bool = False, show_default: bool = True, **kwargs: typing.Dict[str, typing.Any]):
|
379
|
-
...
|
380
|
-
def __repr__(self):
|
381
|
-
...
|
382
|
-
def __str__(self):
|
383
|
-
...
|
384
|
-
def option_kwargs(self, deploy_mode):
|
385
|
-
...
|
386
|
-
def load_parameter(self, v):
|
387
|
-
...
|
388
|
-
@property
|
389
|
-
def is_string_type(self):
|
390
|
-
...
|
391
|
-
def __getitem__(self, x):
|
392
|
-
...
|
393
|
-
...
|
394
|
-
|
395
|
-
class JSONTypeClass(metaflow._vendor.click.types.ParamType, metaclass=type):
|
396
|
-
def convert(self, value, param, ctx):
|
397
|
-
...
|
398
|
-
def __str__(self):
|
399
|
-
...
|
400
|
-
def __repr__(self):
|
401
|
-
...
|
402
|
-
...
|
403
|
-
|
404
|
-
JSONType: metaflow.parameters.JSONTypeClass
|
405
|
-
|
406
|
-
class S3(object, metaclass=type):
|
407
|
-
"""
|
408
|
-
The Metaflow S3 client.
|
409
|
-
|
410
|
-
This object manages the connection to S3 and a temporary diretory that is used
|
411
|
-
to download objects. Note that in most cases when the data fits in memory, no local
|
412
|
-
disk IO is needed as operations are cached by the operating system, which makes
|
413
|
-
operations fast as long as there is enough memory available.
|
414
|
-
|
415
|
-
The easiest way is to use this object as a context manager:
|
416
|
-
```
|
417
|
-
with S3() as s3:
|
418
|
-
data = [obj.blob for obj in s3.get_many(urls)]
|
419
|
-
print(data)
|
420
|
-
```
|
421
|
-
The context manager takes care of creating and deleting a temporary directory
|
422
|
-
automatically. Without a context manager, you must call `.close()` to delete
|
423
|
-
the directory explicitly:
|
424
|
-
```
|
425
|
-
s3 = S3()
|
426
|
-
data = [obj.blob for obj in s3.get_many(urls)]
|
427
|
-
s3.close()
|
428
|
-
```
|
429
|
-
You can customize the location of the temporary directory with `tmproot`. It
|
430
|
-
defaults to the current working directory.
|
431
|
-
|
432
|
-
To make it easier to deal with object locations, the client can be initialized
|
433
|
-
with an S3 path prefix. There are three ways to handle locations:
|
434
|
-
|
435
|
-
1. Use a `metaflow.Run` object or `self`, e.g. `S3(run=self)` which
|
436
|
-
initializes the prefix with the global `DATATOOLS_S3ROOT` path, combined
|
437
|
-
with the current run ID. This mode makes it easy to version data based
|
438
|
-
on the run ID consistently. You can use the `bucket` and `prefix` to
|
439
|
-
override parts of `DATATOOLS_S3ROOT`.
|
440
|
-
|
441
|
-
2. Specify an S3 prefix explicitly with `s3root`,
|
442
|
-
e.g. `S3(s3root='s3://mybucket/some/path')`.
|
443
|
-
|
444
|
-
3. Specify nothing, i.e. `S3()`, in which case all operations require
|
445
|
-
a full S3 url prefixed with `s3://`.
|
446
|
-
|
447
|
-
Parameters
|
448
|
-
----------
|
449
|
-
tmproot : str, default: '.'
|
450
|
-
Where to store the temporary directory.
|
451
|
-
bucket : str, optional
|
452
|
-
Override the bucket from `DATATOOLS_S3ROOT` when `run` is specified.
|
453
|
-
prefix : str, optional
|
454
|
-
Override the path from `DATATOOLS_S3ROOT` when `run` is specified.
|
455
|
-
run : FlowSpec or Run, optional
|
456
|
-
Derive path prefix from the current or a past run ID, e.g. S3(run=self).
|
457
|
-
s3root : str, optional
|
458
|
-
If `run` is not specified, use this as the S3 prefix.
|
459
|
-
"""
|
460
|
-
@classmethod
|
461
|
-
def get_root_from_config(cls, echo, create_on_absent = True):
|
462
|
-
...
|
463
|
-
def __enter__(self) -> metaflow.plugins.datatools.s3.s3.S3:
|
464
|
-
...
|
465
|
-
def __exit__(self, *args):
|
466
|
-
...
|
467
|
-
def close(self):
|
468
|
-
"""
|
469
|
-
Delete all temporary files downloaded in this context.
|
470
|
-
"""
|
471
|
-
...
|
472
|
-
def list_paths(self, keys: typing.Optional[typing.Iterable[str]] = None) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
473
|
-
"""
|
474
|
-
List the next level of paths in S3.
|
475
|
-
|
476
|
-
If multiple keys are specified, listings are done in parallel. The returned
|
477
|
-
S3Objects have `.exists == False` if the path refers to a prefix, not an
|
478
|
-
existing S3 object.
|
479
|
-
|
480
|
-
For instance, if the directory hierarchy is
|
481
|
-
```
|
482
|
-
a/0.txt
|
483
|
-
a/b/1.txt
|
484
|
-
a/c/2.txt
|
485
|
-
a/d/e/3.txt
|
486
|
-
f/4.txt
|
487
|
-
```
|
488
|
-
The `list_paths(['a', 'f'])` call returns
|
489
|
-
```
|
490
|
-
a/0.txt (exists == True)
|
491
|
-
a/b/ (exists == False)
|
492
|
-
a/c/ (exists == False)
|
493
|
-
a/d/ (exists == False)
|
494
|
-
f/4.txt (exists == True)
|
495
|
-
```
|
496
|
-
|
497
|
-
Parameters
|
498
|
-
----------
|
499
|
-
keys : Iterable[str], optional, default None
|
500
|
-
List of paths.
|
501
|
-
|
502
|
-
Returns
|
503
|
-
-------
|
504
|
-
List[S3Object]
|
505
|
-
S3Objects under the given paths, including prefixes (directories) that
|
506
|
-
do not correspond to leaf objects.
|
507
|
-
"""
|
508
|
-
...
|
509
|
-
def list_recursive(self, keys: typing.Optional[typing.Iterable[str]] = None) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
510
|
-
"""
|
511
|
-
List all objects recursively under the given prefixes.
|
512
|
-
|
513
|
-
If multiple keys are specified, listings are done in parallel. All objects
|
514
|
-
returned have `.exists == True` as this call always returns leaf objects.
|
515
|
-
|
516
|
-
For instance, if the directory hierarchy is
|
517
|
-
```
|
518
|
-
a/0.txt
|
519
|
-
a/b/1.txt
|
520
|
-
a/c/2.txt
|
521
|
-
a/d/e/3.txt
|
522
|
-
f/4.txt
|
523
|
-
```
|
524
|
-
The `list_paths(['a', 'f'])` call returns
|
525
|
-
```
|
526
|
-
a/0.txt (exists == True)
|
527
|
-
a/b/1.txt (exists == True)
|
528
|
-
a/c/2.txt (exists == True)
|
529
|
-
a/d/e/3.txt (exists == True)
|
530
|
-
f/4.txt (exists == True)
|
531
|
-
```
|
532
|
-
|
533
|
-
Parameters
|
534
|
-
----------
|
535
|
-
keys : Iterable[str], optional, default None
|
536
|
-
List of paths.
|
537
|
-
|
538
|
-
Returns
|
539
|
-
-------
|
540
|
-
List[S3Object]
|
541
|
-
S3Objects under the given paths.
|
542
|
-
"""
|
543
|
-
...
|
544
|
-
def info(self, key: typing.Optional[str] = None, return_missing: bool = False) -> metaflow.plugins.datatools.s3.s3.S3Object:
|
545
|
-
"""
|
546
|
-
Get metadata about a single object in S3.
|
547
|
-
|
548
|
-
This call makes a single `HEAD` request to S3 which can be
|
549
|
-
much faster than downloading all data with `get`.
|
550
|
-
|
551
|
-
Parameters
|
552
|
-
----------
|
553
|
-
key : str, optional, default None
|
554
|
-
Object to query. It can be an S3 url or a path suffix.
|
555
|
-
return_missing : bool, default False
|
556
|
-
If set to True, do not raise an exception for a missing key but
|
557
|
-
return it as an `S3Object` with `.exists == False`.
|
558
|
-
|
559
|
-
Returns
|
560
|
-
-------
|
561
|
-
S3Object
|
562
|
-
An S3Object corresponding to the object requested. The object
|
563
|
-
will have `.downloaded == False`.
|
564
|
-
"""
|
565
|
-
...
|
566
|
-
def info_many(self, keys: typing.Iterable[str], return_missing: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
567
|
-
"""
|
568
|
-
Get metadata about many objects in S3 in parallel.
|
569
|
-
|
570
|
-
This call makes a single `HEAD` request to S3 which can be
|
571
|
-
much faster than downloading all data with `get`.
|
572
|
-
|
573
|
-
Parameters
|
574
|
-
----------
|
575
|
-
keys : Iterable[str]
|
576
|
-
Objects to query. Each key can be an S3 url or a path suffix.
|
577
|
-
return_missing : bool, default False
|
578
|
-
If set to True, do not raise an exception for a missing key but
|
579
|
-
return it as an `S3Object` with `.exists == False`.
|
580
|
-
|
581
|
-
Returns
|
582
|
-
-------
|
583
|
-
List[S3Object]
|
584
|
-
A list of S3Objects corresponding to the paths requested. The
|
585
|
-
objects will have `.downloaded == False`.
|
586
|
-
"""
|
587
|
-
...
|
588
|
-
def get(self, key: typing.Union[str, metaflow.plugins.datatools.s3.s3.S3GetObject, None] = None, return_missing: bool = False, return_info: bool = True) -> metaflow.plugins.datatools.s3.s3.S3Object:
|
589
|
-
"""
|
590
|
-
Get a single object from S3.
|
591
|
-
|
592
|
-
Parameters
|
593
|
-
----------
|
594
|
-
key : Union[str, S3GetObject], optional, default None
|
595
|
-
Object to download. It can be an S3 url, a path suffix, or
|
596
|
-
an S3GetObject that defines a range of data to download. If None, or
|
597
|
-
not provided, gets the S3 root.
|
598
|
-
return_missing : bool, default False
|
599
|
-
If set to True, do not raise an exception for a missing key but
|
600
|
-
return it as an `S3Object` with `.exists == False`.
|
601
|
-
return_info : bool, default True
|
602
|
-
If set to True, fetch the content-type and user metadata associated
|
603
|
-
with the object at no extra cost, included for symmetry with `get_many`
|
604
|
-
|
605
|
-
Returns
|
606
|
-
-------
|
607
|
-
S3Object
|
608
|
-
An S3Object corresponding to the object requested.
|
609
|
-
"""
|
610
|
-
...
|
611
|
-
def get_many(self, keys: typing.Iterable[typing.Union[str, metaflow.plugins.datatools.s3.s3.S3GetObject]], return_missing: bool = False, return_info: bool = True) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
612
|
-
"""
|
613
|
-
Get many objects from S3 in parallel.
|
614
|
-
|
615
|
-
Parameters
|
616
|
-
----------
|
617
|
-
keys : Iterable[Union[str, S3GetObject]]
|
618
|
-
Objects to download. Each object can be an S3 url, a path suffix, or
|
619
|
-
an S3GetObject that defines a range of data to download.
|
620
|
-
return_missing : bool, default False
|
621
|
-
If set to True, do not raise an exception for a missing key but
|
622
|
-
return it as an `S3Object` with `.exists == False`.
|
623
|
-
return_info : bool, default True
|
624
|
-
If set to True, fetch the content-type and user metadata associated
|
625
|
-
with the object at no extra cost, included for symmetry with `get_many`.
|
626
|
-
|
627
|
-
Returns
|
628
|
-
-------
|
629
|
-
List[S3Object]
|
630
|
-
S3Objects corresponding to the objects requested.
|
631
|
-
"""
|
632
|
-
...
|
633
|
-
def get_recursive(self, keys: typing.Iterable[str], return_info: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
634
|
-
"""
|
635
|
-
Get many objects from S3 recursively in parallel.
|
636
|
-
|
637
|
-
Parameters
|
638
|
-
----------
|
639
|
-
keys : Iterable[str]
|
640
|
-
Prefixes to download recursively. Each prefix can be an S3 url or a path suffix
|
641
|
-
which define the root prefix under which all objects are downloaded.
|
642
|
-
return_info : bool, default False
|
643
|
-
If set to True, fetch the content-type and user metadata associated
|
644
|
-
with the object.
|
645
|
-
|
646
|
-
Returns
|
647
|
-
-------
|
648
|
-
List[S3Object]
|
649
|
-
S3Objects stored under the given prefixes.
|
650
|
-
"""
|
651
|
-
...
|
652
|
-
def get_all(self, return_info: bool = False) -> typing.List[metaflow.plugins.datatools.s3.s3.S3Object]:
|
653
|
-
"""
|
654
|
-
Get all objects under the prefix set in the `S3` constructor.
|
655
|
-
|
656
|
-
This method requires that the `S3` object is initialized either with `run` or
|
657
|
-
`s3root`.
|
658
|
-
|
659
|
-
Parameters
|
660
|
-
----------
|
661
|
-
return_info : bool, default False
|
662
|
-
If set to True, fetch the content-type and user metadata associated
|
663
|
-
with the object.
|
664
|
-
|
665
|
-
Returns
|
666
|
-
-------
|
667
|
-
Iterable[S3Object]
|
668
|
-
S3Objects stored under the main prefix.
|
669
|
-
"""
|
670
|
-
...
|
671
|
-
def put(self, key: typing.Union[str, metaflow.plugins.datatools.s3.s3.S3PutObject], obj: typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes], overwrite: bool = True, content_type: typing.Optional[str] = None, metadata: typing.Optional[typing.Dict[str, str]] = None) -> str:
|
672
|
-
"""
|
673
|
-
Upload a single object to S3.
|
674
|
-
|
675
|
-
Parameters
|
676
|
-
----------
|
677
|
-
key : Union[str, S3PutObject]
|
678
|
-
Object path. It can be an S3 url or a path suffix.
|
679
|
-
obj : PutValue
|
680
|
-
An object to store in S3. Strings are converted to UTF-8 encoding.
|
681
|
-
overwrite : bool, default True
|
682
|
-
Overwrite the object if it exists. If set to False, the operation
|
683
|
-
succeeds without uploading anything if the key already exists.
|
684
|
-
content_type : str, optional, default None
|
685
|
-
Optional MIME type for the object.
|
686
|
-
metadata : Dict[str, str], optional, default None
|
687
|
-
A JSON-encodable dictionary of additional headers to be stored
|
688
|
-
as metadata with the object.
|
689
|
-
|
690
|
-
Returns
|
691
|
-
-------
|
692
|
-
str
|
693
|
-
URL of the object stored.
|
694
|
-
"""
|
695
|
-
...
|
696
|
-
def put_many(self, key_objs: typing.List[typing.Union[typing.Tuple[str, typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes]], metaflow.plugins.datatools.s3.s3.S3PutObject]], overwrite: bool = True) -> typing.List[typing.Tuple[str, str]]:
|
697
|
-
"""
|
698
|
-
Upload many objects to S3.
|
699
|
-
|
700
|
-
Each object to be uploaded can be specified in two ways:
|
701
|
-
|
702
|
-
1. As a `(key, obj)` tuple where `key` is a string specifying
|
703
|
-
the path and `obj` is a string or a bytes object.
|
704
|
-
|
705
|
-
2. As a `S3PutObject` which contains additional metadata to be
|
706
|
-
stored with the object.
|
707
|
-
|
708
|
-
Parameters
|
709
|
-
----------
|
710
|
-
key_objs : List[Union[Tuple[str, PutValue], S3PutObject]]
|
711
|
-
List of key-object pairs to upload.
|
712
|
-
overwrite : bool, default True
|
713
|
-
Overwrite the object if it exists. If set to False, the operation
|
714
|
-
succeeds without uploading anything if the key already exists.
|
715
|
-
|
716
|
-
Returns
|
717
|
-
-------
|
718
|
-
List[Tuple[str, str]]
|
719
|
-
List of `(key, url)` pairs corresponding to the objects uploaded.
|
720
|
-
"""
|
721
|
-
...
|
722
|
-
def put_files(self, key_paths: typing.List[typing.Union[typing.Tuple[str, typing.Union[io.RawIOBase, io.BufferedIOBase, str, bytes]], metaflow.plugins.datatools.s3.s3.S3PutObject]], overwrite: bool = True) -> typing.List[typing.Tuple[str, str]]:
|
723
|
-
"""
|
724
|
-
Upload many local files to S3.
|
725
|
-
|
726
|
-
Each file to be uploaded can be specified in two ways:
|
727
|
-
|
728
|
-
1. As a `(key, path)` tuple where `key` is a string specifying
|
729
|
-
the S3 path and `path` is the path to a local file.
|
730
|
-
|
731
|
-
2. As a `S3PutObject` which contains additional metadata to be
|
732
|
-
stored with the file.
|
733
|
-
|
734
|
-
Parameters
|
735
|
-
----------
|
736
|
-
key_paths : List[Union[Tuple[str, PutValue], S3PutObject]]
|
737
|
-
List of files to upload.
|
738
|
-
overwrite : bool, default True
|
739
|
-
Overwrite the object if it exists. If set to False, the operation
|
740
|
-
succeeds without uploading anything if the key already exists.
|
741
|
-
|
742
|
-
Returns
|
743
|
-
-------
|
744
|
-
List[Tuple[str, str]]
|
745
|
-
List of `(key, url)` pairs corresponding to the files uploaded.
|
746
|
-
"""
|
747
|
-
...
|
748
|
-
...
|
749
|
-
|
750
|
-
class IncludeFile(metaflow.parameters.Parameter, metaclass=type):
|
751
|
-
"""
|
752
|
-
Includes a local file as a parameter for the flow.
|
753
|
-
|
754
|
-
`IncludeFile` behaves like `Parameter` except that it reads its value from a file instead of
|
755
|
-
the command line. The user provides a path to a file on the command line. The file contents
|
756
|
-
are saved as a read-only artifact which is available in all steps of the flow.
|
757
|
-
|
758
|
-
Parameters
|
759
|
-
----------
|
760
|
-
name : str
|
761
|
-
User-visible parameter name.
|
762
|
-
default : Union[str, Callable[ParameterContext, str]]
|
763
|
-
Default path to a local file. A function
|
764
|
-
implies that the parameter corresponds to a *deploy-time parameter*.
|
765
|
-
is_text : bool, default True
|
766
|
-
Convert the file contents to a string using the provided `encoding`.
|
767
|
-
If False, the artifact is stored in `bytes`.
|
768
|
-
encoding : str, optional, default 'utf-8'
|
769
|
-
Use this encoding to decode the file contexts if `is_text=True`.
|
770
|
-
required : bool, default False
|
771
|
-
Require that the user specified a value for the parameter.
|
772
|
-
`required=True` implies that the `default` is not used.
|
773
|
-
help : str, optional
|
774
|
-
Help text to show in `run --help`.
|
775
|
-
show_default : bool, default True
|
776
|
-
If True, show the default value in the help text.
|
777
|
-
"""
|
778
|
-
def __init__(self, name: str, required: bool = False, is_text: bool = True, encoding: str = "utf-8", help: typing.Optional[str] = None, **kwargs: typing.Dict[str, str]):
|
779
|
-
...
|
780
|
-
def load_parameter(self, v):
|
781
|
-
...
|
782
|
-
...
|
65
|
+
EXT_PKG: str
|
783
66
|
|
784
67
|
@typing.overload
|
785
68
|
def step(f: typing.Callable[[FlowSpecDerived], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
@@ -855,79 +138,33 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
|
|
855
138
|
...
|
856
139
|
|
857
140
|
@typing.overload
|
858
|
-
def
|
141
|
+
def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
859
142
|
"""
|
860
|
-
Specifies the
|
861
|
-
|
862
|
-
Use `@resources` to specify the resource requirements
|
863
|
-
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
864
|
-
|
865
|
-
You can choose the compute layer on the command line by executing e.g.
|
866
|
-
```
|
867
|
-
python myflow.py run --with batch
|
868
|
-
```
|
869
|
-
or
|
870
|
-
```
|
871
|
-
python myflow.py run --with kubernetes
|
872
|
-
```
|
873
|
-
which executes the flow on the desired system using the
|
874
|
-
requirements specified in `@resources`.
|
143
|
+
Specifies that the step will success under all circumstances.
|
875
144
|
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
gpu : int, optional, default None
|
881
|
-
Number of GPUs required for this step.
|
882
|
-
disk : int, optional, default None
|
883
|
-
Disk size (in MB) required for this step. Only applies on Kubernetes.
|
884
|
-
memory : int, default 4096
|
885
|
-
Memory size (in MB) required for this step.
|
886
|
-
shared_memory : int, optional, default None
|
887
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
888
|
-
This parameter maps to the `--shm-size` option in Docker.
|
145
|
+
The decorator will create an optional artifact, specified by `var`, which
|
146
|
+
contains the exception raised. You can use it to detect the presence
|
147
|
+
of errors, indicating that all happy-path artifacts produced by the step
|
148
|
+
are missing.
|
889
149
|
"""
|
890
150
|
...
|
891
151
|
|
892
152
|
@typing.overload
|
893
|
-
def
|
153
|
+
def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
894
154
|
...
|
895
155
|
|
896
156
|
@typing.overload
|
897
|
-
def
|
157
|
+
def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
898
158
|
...
|
899
159
|
|
900
|
-
def
|
160
|
+
def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
|
901
161
|
"""
|
902
|
-
Specifies the
|
903
|
-
|
904
|
-
Use `@resources` to specify the resource requirements
|
905
|
-
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
906
|
-
|
907
|
-
You can choose the compute layer on the command line by executing e.g.
|
908
|
-
```
|
909
|
-
python myflow.py run --with batch
|
910
|
-
```
|
911
|
-
or
|
912
|
-
```
|
913
|
-
python myflow.py run --with kubernetes
|
914
|
-
```
|
915
|
-
which executes the flow on the desired system using the
|
916
|
-
requirements specified in `@resources`.
|
162
|
+
Specifies that the step will success under all circumstances.
|
917
163
|
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
gpu : int, optional, default None
|
923
|
-
Number of GPUs required for this step.
|
924
|
-
disk : int, optional, default None
|
925
|
-
Disk size (in MB) required for this step. Only applies on Kubernetes.
|
926
|
-
memory : int, default 4096
|
927
|
-
Memory size (in MB) required for this step.
|
928
|
-
shared_memory : int, optional, default None
|
929
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
930
|
-
This parameter maps to the `--shm-size` option in Docker.
|
164
|
+
The decorator will create an optional artifact, specified by `var`, which
|
165
|
+
contains the exception raised. You can use it to detect the presence
|
166
|
+
of errors, indicating that all happy-path artifacts produced by the step
|
167
|
+
are missing.
|
931
168
|
"""
|
932
169
|
...
|
933
170
|
|
@@ -944,13 +181,6 @@ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callabl
|
|
944
181
|
This can be used in conjunction with the `@catch` decorator. The `@catch`
|
945
182
|
decorator will execute a no-op task after all retries have been exhausted,
|
946
183
|
ensuring that the flow execution can continue.
|
947
|
-
|
948
|
-
Parameters
|
949
|
-
----------
|
950
|
-
times : int, default 3
|
951
|
-
Number of times to retry this task.
|
952
|
-
minutes_between_retries : int, default 2
|
953
|
-
Number of minutes between retries.
|
954
184
|
"""
|
955
185
|
...
|
956
186
|
|
@@ -974,575 +204,271 @@ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
|
|
974
204
|
This can be used in conjunction with the `@catch` decorator. The `@catch`
|
975
205
|
decorator will execute a no-op task after all retries have been exhausted,
|
976
206
|
ensuring that the flow execution can continue.
|
977
|
-
|
978
|
-
Parameters
|
979
|
-
----------
|
980
|
-
times : int, default 3
|
981
|
-
Number of times to retry this task.
|
982
|
-
minutes_between_retries : int, default 2
|
983
|
-
Number of minutes between retries.
|
984
207
|
"""
|
985
208
|
...
|
986
209
|
|
987
210
|
@typing.overload
|
988
|
-
def
|
211
|
+
def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
989
212
|
"""
|
990
|
-
|
991
|
-
|
992
|
-
This decorator is useful if this step may hang indefinitely.
|
993
|
-
|
994
|
-
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
995
|
-
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
996
|
-
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
997
|
-
|
998
|
-
Note that all the values specified in parameters are added together so if you specify
|
999
|
-
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
213
|
+
Creates a human-readable report, a Metaflow Card, after this step completes.
|
1000
214
|
|
1001
|
-
|
1002
|
-
----------
|
1003
|
-
seconds : int, default 0
|
1004
|
-
Number of seconds to wait prior to timing out.
|
1005
|
-
minutes : int, default 0
|
1006
|
-
Number of minutes to wait prior to timing out.
|
1007
|
-
hours : int, default 0
|
1008
|
-
Number of hours to wait prior to timing out.
|
215
|
+
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1009
216
|
"""
|
1010
217
|
...
|
1011
218
|
|
1012
219
|
@typing.overload
|
1013
|
-
def
|
220
|
+
def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1014
221
|
...
|
1015
222
|
|
1016
223
|
@typing.overload
|
1017
|
-
def
|
224
|
+
def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1018
225
|
...
|
1019
226
|
|
1020
|
-
def
|
227
|
+
def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
|
1021
228
|
"""
|
1022
|
-
|
1023
|
-
|
1024
|
-
This decorator is useful if this step may hang indefinitely.
|
1025
|
-
|
1026
|
-
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
1027
|
-
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
1028
|
-
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
1029
|
-
|
1030
|
-
Note that all the values specified in parameters are added together so if you specify
|
1031
|
-
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
229
|
+
Creates a human-readable report, a Metaflow Card, after this step completes.
|
1032
230
|
|
1033
|
-
|
1034
|
-
----------
|
1035
|
-
seconds : int, default 0
|
1036
|
-
Number of seconds to wait prior to timing out.
|
1037
|
-
minutes : int, default 0
|
1038
|
-
Number of minutes to wait prior to timing out.
|
1039
|
-
hours : int, default 0
|
1040
|
-
Number of hours to wait prior to timing out.
|
231
|
+
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1041
232
|
"""
|
1042
233
|
...
|
1043
234
|
|
1044
235
|
@typing.overload
|
1045
|
-
def
|
236
|
+
def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1046
237
|
"""
|
1047
|
-
Specifies
|
1048
|
-
|
1049
|
-
Information in this decorator will augment any
|
1050
|
-
attributes set in the `@pyi_base` flow-level decorator. Hence,
|
1051
|
-
you can use `@pypi_base` to set packages required by all
|
1052
|
-
steps and use `@pypi` to specify step-specific overrides.
|
1053
|
-
|
1054
|
-
Parameters
|
1055
|
-
----------
|
1056
|
-
packages : Dict[str, str], default: {}
|
1057
|
-
Packages to use for this step. The key is the name of the package
|
1058
|
-
and the value is the version to use.
|
1059
|
-
python : str, optional, default: None
|
1060
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1061
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
238
|
+
Specifies secrets to be retrieved and injected as environment variables prior to
|
239
|
+
the execution of a step.
|
1062
240
|
"""
|
1063
241
|
...
|
1064
242
|
|
1065
243
|
@typing.overload
|
1066
|
-
def
|
244
|
+
def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1067
245
|
...
|
1068
246
|
|
1069
247
|
@typing.overload
|
1070
|
-
def
|
248
|
+
def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1071
249
|
...
|
1072
250
|
|
1073
|
-
def
|
251
|
+
def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
|
1074
252
|
"""
|
1075
|
-
Specifies
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
----------
|
1084
|
-
packages : Dict[str, str], default: {}
|
1085
|
-
Packages to use for this step. The key is the name of the package
|
1086
|
-
and the value is the version to use.
|
1087
|
-
python : str, optional, default: None
|
1088
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1089
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
253
|
+
Specifies secrets to be retrieved and injected as environment variables prior to
|
254
|
+
the execution of a step.
|
255
|
+
"""
|
256
|
+
...
|
257
|
+
|
258
|
+
def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
259
|
+
"""
|
260
|
+
Specifies that this step should execute on Kubernetes.
|
1090
261
|
"""
|
1091
262
|
...
|
1092
263
|
|
1093
264
|
@typing.overload
|
1094
|
-
def
|
265
|
+
def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1095
266
|
"""
|
1096
|
-
|
1097
|
-
|
267
|
+
Specifies the resources needed when executing this step.
|
268
|
+
|
269
|
+
Use `@resources` to specify the resource requirements
|
270
|
+
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
271
|
+
|
272
|
+
You can choose the compute layer on the command line by executing e.g.
|
273
|
+
```
|
274
|
+
python myflow.py run --with batch
|
275
|
+
```
|
276
|
+
or
|
277
|
+
```
|
278
|
+
python myflow.py run --with kubernetes
|
279
|
+
```
|
280
|
+
which executes the flow on the desired system using the
|
281
|
+
requirements specified in `@resources`.
|
1098
282
|
"""
|
1099
283
|
...
|
1100
284
|
|
1101
285
|
@typing.overload
|
1102
|
-
def
|
286
|
+
def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1103
287
|
...
|
1104
288
|
|
1105
|
-
|
289
|
+
@typing.overload
|
290
|
+
def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
291
|
+
...
|
292
|
+
|
293
|
+
def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
|
1106
294
|
"""
|
1107
|
-
|
1108
|
-
|
295
|
+
Specifies the resources needed when executing this step.
|
296
|
+
|
297
|
+
Use `@resources` to specify the resource requirements
|
298
|
+
independently of the specific compute layer (`@batch`, `@kubernetes`).
|
299
|
+
|
300
|
+
You can choose the compute layer on the command line by executing e.g.
|
301
|
+
```
|
302
|
+
python myflow.py run --with batch
|
303
|
+
```
|
304
|
+
or
|
305
|
+
```
|
306
|
+
python myflow.py run --with kubernetes
|
307
|
+
```
|
308
|
+
which executes the flow on the desired system using the
|
309
|
+
requirements specified in `@resources`.
|
1109
310
|
"""
|
1110
311
|
...
|
1111
312
|
|
1112
313
|
@typing.overload
|
1113
|
-
def
|
314
|
+
def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1114
315
|
"""
|
1115
|
-
Specifies
|
316
|
+
Specifies the Conda environment for the step.
|
1116
317
|
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
also present, the maximum value from all decorators is used.
|
1122
|
-
gpu : int, default 0
|
1123
|
-
Number of GPUs required for this step. If `@resources` is
|
1124
|
-
also present, the maximum value from all decorators is used.
|
1125
|
-
memory : int, default 4096
|
1126
|
-
Memory size (in MB) required for this step. If
|
1127
|
-
`@resources` is also present, the maximum value from all decorators is
|
1128
|
-
used.
|
1129
|
-
image : str, optional, default None
|
1130
|
-
Docker image to use when launching on AWS Batch. If not specified, and
|
1131
|
-
METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
|
1132
|
-
not, a default Docker image mapping to the current version of Python is used.
|
1133
|
-
queue : str, default METAFLOW_BATCH_JOB_QUEUE
|
1134
|
-
AWS Batch Job Queue to submit the job to.
|
1135
|
-
iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
|
1136
|
-
AWS IAM role that AWS Batch container uses to access AWS cloud resources.
|
1137
|
-
execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
|
1138
|
-
AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
|
1139
|
-
(https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
|
1140
|
-
shared_memory : int, optional, default None
|
1141
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
1142
|
-
This parameter maps to the `--shm-size` option in Docker.
|
1143
|
-
max_swap : int, optional, default None
|
1144
|
-
The total amount of swap memory (in MiB) a container can use for this
|
1145
|
-
step. This parameter is translated to the `--memory-swap` option in
|
1146
|
-
Docker where the value is the sum of the container memory plus the
|
1147
|
-
`max_swap` value.
|
1148
|
-
swappiness : int, optional, default None
|
1149
|
-
This allows you to tune memory swappiness behavior for this step.
|
1150
|
-
A swappiness value of 0 causes swapping not to happen unless absolutely
|
1151
|
-
necessary. A swappiness value of 100 causes pages to be swapped very
|
1152
|
-
aggressively. Accepted values are whole numbers between 0 and 100.
|
1153
|
-
use_tmpfs : bool, default False
|
1154
|
-
This enables an explicit tmpfs mount for this step. Note that tmpfs is
|
1155
|
-
not available on Fargate compute environments
|
1156
|
-
tmpfs_tempdir : bool, default True
|
1157
|
-
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
|
1158
|
-
tmpfs_size : int, optional, default None
|
1159
|
-
The value for the size (in MiB) of the tmpfs mount for this step.
|
1160
|
-
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
|
1161
|
-
memory allocated for this step.
|
1162
|
-
tmpfs_path : str, optional, default None
|
1163
|
-
Path to tmpfs mount for this step. Defaults to /metaflow_temp.
|
1164
|
-
inferentia : int, default 0
|
1165
|
-
Number of Inferentia chips required for this step.
|
1166
|
-
trainium : int, default None
|
1167
|
-
Alias for inferentia. Use only one of the two.
|
1168
|
-
efa : int, default 0
|
1169
|
-
Number of elastic fabric adapter network devices to attach to container
|
1170
|
-
ephemeral_storage : int, default None
|
1171
|
-
The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
|
1172
|
-
This is only relevant for Fargate compute environments
|
1173
|
-
log_driver: str, optional, default None
|
1174
|
-
The log driver to use for the Amazon ECS container.
|
1175
|
-
log_options: List[str], optional, default None
|
1176
|
-
List of strings containing options for the chosen log driver. The configurable values
|
1177
|
-
depend on the `log driver` chosen. Validation of these options is not supported yet.
|
1178
|
-
Example: [`awslogs-group:aws/batch/job`]
|
318
|
+
Information in this decorator will augment any
|
319
|
+
attributes set in the `@conda_base` flow-level decorator. Hence,
|
320
|
+
you can use `@conda_base` to set packages required by all
|
321
|
+
steps and use `@conda` to specify step-specific overrides.
|
1179
322
|
"""
|
1180
323
|
...
|
1181
324
|
|
1182
325
|
@typing.overload
|
1183
|
-
def
|
326
|
+
def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1184
327
|
...
|
1185
328
|
|
1186
329
|
@typing.overload
|
1187
|
-
def
|
330
|
+
def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1188
331
|
...
|
1189
332
|
|
1190
|
-
def
|
333
|
+
def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
|
1191
334
|
"""
|
1192
|
-
Specifies
|
335
|
+
Specifies the Conda environment for the step.
|
1193
336
|
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
Number of GPUs required for this step. If `@resources` is
|
1201
|
-
also present, the maximum value from all decorators is used.
|
1202
|
-
memory : int, default 4096
|
1203
|
-
Memory size (in MB) required for this step. If
|
1204
|
-
`@resources` is also present, the maximum value from all decorators is
|
1205
|
-
used.
|
1206
|
-
image : str, optional, default None
|
1207
|
-
Docker image to use when launching on AWS Batch. If not specified, and
|
1208
|
-
METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
|
1209
|
-
not, a default Docker image mapping to the current version of Python is used.
|
1210
|
-
queue : str, default METAFLOW_BATCH_JOB_QUEUE
|
1211
|
-
AWS Batch Job Queue to submit the job to.
|
1212
|
-
iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
|
1213
|
-
AWS IAM role that AWS Batch container uses to access AWS cloud resources.
|
1214
|
-
execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
|
1215
|
-
AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
|
1216
|
-
(https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
|
1217
|
-
shared_memory : int, optional, default None
|
1218
|
-
The value for the size (in MiB) of the /dev/shm volume for this step.
|
1219
|
-
This parameter maps to the `--shm-size` option in Docker.
|
1220
|
-
max_swap : int, optional, default None
|
1221
|
-
The total amount of swap memory (in MiB) a container can use for this
|
1222
|
-
step. This parameter is translated to the `--memory-swap` option in
|
1223
|
-
Docker where the value is the sum of the container memory plus the
|
1224
|
-
`max_swap` value.
|
1225
|
-
swappiness : int, optional, default None
|
1226
|
-
This allows you to tune memory swappiness behavior for this step.
|
1227
|
-
A swappiness value of 0 causes swapping not to happen unless absolutely
|
1228
|
-
necessary. A swappiness value of 100 causes pages to be swapped very
|
1229
|
-
aggressively. Accepted values are whole numbers between 0 and 100.
|
1230
|
-
use_tmpfs : bool, default False
|
1231
|
-
This enables an explicit tmpfs mount for this step. Note that tmpfs is
|
1232
|
-
not available on Fargate compute environments
|
1233
|
-
tmpfs_tempdir : bool, default True
|
1234
|
-
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
|
1235
|
-
tmpfs_size : int, optional, default None
|
1236
|
-
The value for the size (in MiB) of the tmpfs mount for this step.
|
1237
|
-
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
|
1238
|
-
memory allocated for this step.
|
1239
|
-
tmpfs_path : str, optional, default None
|
1240
|
-
Path to tmpfs mount for this step. Defaults to /metaflow_temp.
|
1241
|
-
inferentia : int, default 0
|
1242
|
-
Number of Inferentia chips required for this step.
|
1243
|
-
trainium : int, default None
|
1244
|
-
Alias for inferentia. Use only one of the two.
|
1245
|
-
efa : int, default 0
|
1246
|
-
Number of elastic fabric adapter network devices to attach to container
|
1247
|
-
ephemeral_storage : int, default None
|
1248
|
-
The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
|
1249
|
-
This is only relevant for Fargate compute environments
|
1250
|
-
log_driver: str, optional, default None
|
1251
|
-
The log driver to use for the Amazon ECS container.
|
1252
|
-
log_options: List[str], optional, default None
|
1253
|
-
List of strings containing options for the chosen log driver. The configurable values
|
1254
|
-
depend on the `log driver` chosen. Validation of these options is not supported yet.
|
1255
|
-
Example: [`awslogs-group:aws/batch/job`]
|
1256
|
-
"""
|
1257
|
-
...
|
337
|
+
Information in this decorator will augment any
|
338
|
+
attributes set in the `@conda_base` flow-level decorator. Hence,
|
339
|
+
you can use `@conda_base` to set packages required by all
|
340
|
+
steps and use `@conda` to specify step-specific overrides.
|
341
|
+
"""
|
342
|
+
...
|
1258
343
|
|
1259
344
|
@typing.overload
|
1260
|
-
def
|
345
|
+
def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1261
346
|
"""
|
1262
|
-
Specifies
|
1263
|
-
|
1264
|
-
The decorator will create an optional artifact, specified by `var`, which
|
1265
|
-
contains the exception raised. You can use it to detect the presence
|
1266
|
-
of errors, indicating that all happy-path artifacts produced by the step
|
1267
|
-
are missing.
|
1268
|
-
|
1269
|
-
Parameters
|
1270
|
-
----------
|
1271
|
-
var : str, optional, default None
|
1272
|
-
Name of the artifact in which to store the caught exception.
|
1273
|
-
If not specified, the exception is not stored.
|
1274
|
-
print_exception : bool, default True
|
1275
|
-
Determines whether or not the exception is printed to
|
1276
|
-
stdout when caught.
|
347
|
+
Specifies environment variables to be set prior to the execution of a step.
|
1277
348
|
"""
|
1278
349
|
...
|
1279
350
|
|
1280
351
|
@typing.overload
|
1281
|
-
def
|
352
|
+
def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1282
353
|
...
|
1283
354
|
|
1284
355
|
@typing.overload
|
1285
|
-
def
|
356
|
+
def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1286
357
|
...
|
1287
358
|
|
1288
|
-
def
|
359
|
+
def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
|
1289
360
|
"""
|
1290
|
-
Specifies
|
1291
|
-
|
1292
|
-
The decorator will create an optional artifact, specified by `var`, which
|
1293
|
-
contains the exception raised. You can use it to detect the presence
|
1294
|
-
of errors, indicating that all happy-path artifacts produced by the step
|
1295
|
-
are missing.
|
1296
|
-
|
1297
|
-
Parameters
|
1298
|
-
----------
|
1299
|
-
var : str, optional, default None
|
1300
|
-
Name of the artifact in which to store the caught exception.
|
1301
|
-
If not specified, the exception is not stored.
|
1302
|
-
print_exception : bool, default True
|
1303
|
-
Determines whether or not the exception is printed to
|
1304
|
-
stdout when caught.
|
361
|
+
Specifies environment variables to be set prior to the execution of a step.
|
1305
362
|
"""
|
1306
363
|
...
|
1307
364
|
|
1308
365
|
@typing.overload
|
1309
|
-
def
|
366
|
+
def batch(*, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = 'METAFLOW_BATCH_JOB_QUEUE', iam_role: str = 'METAFLOW_ECS_S3_ACCESS_IAM_ROLE', execution_role: str = 'METAFLOW_ECS_FARGATE_EXECUTION_ROLE', shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1310
367
|
"""
|
1311
|
-
Specifies
|
1312
|
-
|
1313
|
-
Parameters
|
1314
|
-
----------
|
1315
|
-
vars : Dict[str, str], default {}
|
1316
|
-
Dictionary of environment variables to set.
|
368
|
+
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
|
1317
369
|
"""
|
1318
370
|
...
|
1319
371
|
|
1320
372
|
@typing.overload
|
1321
|
-
def
|
373
|
+
def batch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1322
374
|
...
|
1323
375
|
|
1324
376
|
@typing.overload
|
1325
|
-
def
|
377
|
+
def batch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1326
378
|
...
|
1327
379
|
|
1328
|
-
def
|
380
|
+
def batch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: int = 0, memory: int = 4096, image: typing.Optional[str] = None, queue: str = 'METAFLOW_BATCH_JOB_QUEUE', iam_role: str = 'METAFLOW_ECS_S3_ACCESS_IAM_ROLE', execution_role: str = 'METAFLOW_ECS_FARGATE_EXECUTION_ROLE', shared_memory: typing.Optional[int] = None, max_swap: typing.Optional[int] = None, swappiness: typing.Optional[int] = None, use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = None, inferentia: int = 0, trainium: int = None, efa: int = 0, ephemeral_storage: int = None, log_driver: typing.Optional[str] = None, log_options: typing.Optional[typing.List[str]] = None):
|
1329
381
|
"""
|
1330
|
-
Specifies
|
1331
|
-
|
1332
|
-
Parameters
|
1333
|
-
----------
|
1334
|
-
vars : Dict[str, str], default {}
|
1335
|
-
Dictionary of environment variables to set.
|
382
|
+
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
|
1336
383
|
"""
|
1337
384
|
...
|
1338
385
|
|
1339
386
|
@typing.overload
|
1340
|
-
def
|
387
|
+
def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1341
388
|
"""
|
1342
|
-
|
1343
|
-
|
1344
|
-
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1345
|
-
|
1346
|
-
Parameters
|
1347
|
-
----------
|
1348
|
-
type : str, default 'default'
|
1349
|
-
Card type.
|
1350
|
-
id : str, optional, default None
|
1351
|
-
If multiple cards are present, use this id to identify this card.
|
1352
|
-
options : Dict[str, Any], default {}
|
1353
|
-
Options passed to the card. The contents depend on the card type.
|
1354
|
-
timeout : int, default 45
|
1355
|
-
Interrupt reporting if it takes more than this many seconds.
|
1356
|
-
|
389
|
+
Specifies the PyPI packages for the step.
|
1357
390
|
|
391
|
+
Information in this decorator will augment any
|
392
|
+
attributes set in the `@pyi_base` flow-level decorator. Hence,
|
393
|
+
you can use `@pypi_base` to set packages required by all
|
394
|
+
steps and use `@pypi` to specify step-specific overrides.
|
1358
395
|
"""
|
1359
396
|
...
|
1360
397
|
|
1361
398
|
@typing.overload
|
1362
|
-
def
|
399
|
+
def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1363
400
|
...
|
1364
401
|
|
1365
402
|
@typing.overload
|
1366
|
-
def
|
403
|
+
def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1367
404
|
...
|
1368
405
|
|
1369
|
-
def
|
406
|
+
def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
|
1370
407
|
"""
|
1371
|
-
|
1372
|
-
|
1373
|
-
Note that you may add multiple `@card` decorators in a step with different parameters.
|
1374
|
-
|
1375
|
-
Parameters
|
1376
|
-
----------
|
1377
|
-
type : str, default 'default'
|
1378
|
-
Card type.
|
1379
|
-
id : str, optional, default None
|
1380
|
-
If multiple cards are present, use this id to identify this card.
|
1381
|
-
options : Dict[str, Any], default {}
|
1382
|
-
Options passed to the card. The contents depend on the card type.
|
1383
|
-
timeout : int, default 45
|
1384
|
-
Interrupt reporting if it takes more than this many seconds.
|
1385
|
-
|
408
|
+
Specifies the PyPI packages for the step.
|
1386
409
|
|
410
|
+
Information in this decorator will augment any
|
411
|
+
attributes set in the `@pyi_base` flow-level decorator. Hence,
|
412
|
+
you can use `@pypi_base` to set packages required by all
|
413
|
+
steps and use `@pypi` to specify step-specific overrides.
|
1387
414
|
"""
|
1388
415
|
...
|
1389
416
|
|
1390
417
|
@typing.overload
|
1391
|
-
def
|
418
|
+
def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1392
419
|
"""
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
Parameters
|
1397
|
-
----------
|
1398
|
-
sources : List[Union[str, Dict[str, Any]]], default: []
|
1399
|
-
List of secret specs, defining how the secrets are to be retrieved
|
420
|
+
Decorator prototype for all step decorators. This function gets specialized
|
421
|
+
and imported for all decorators types by _import_plugin_decorators().
|
1400
422
|
"""
|
1401
423
|
...
|
1402
424
|
|
1403
425
|
@typing.overload
|
1404
|
-
def
|
1405
|
-
...
|
1406
|
-
|
1407
|
-
@typing.overload
|
1408
|
-
def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
426
|
+
def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1409
427
|
...
|
1410
428
|
|
1411
|
-
def
|
429
|
+
def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
|
1412
430
|
"""
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
Parameters
|
1417
|
-
----------
|
1418
|
-
sources : List[Union[str, Dict[str, Any]]], default: []
|
1419
|
-
List of secret specs, defining how the secrets are to be retrieved
|
431
|
+
Decorator prototype for all step decorators. This function gets specialized
|
432
|
+
and imported for all decorators types by _import_plugin_decorators().
|
1420
433
|
"""
|
1421
434
|
...
|
1422
435
|
|
1423
436
|
@typing.overload
|
1424
|
-
def
|
437
|
+
def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1425
438
|
"""
|
1426
|
-
Specifies
|
439
|
+
Specifies a timeout for your step.
|
1427
440
|
|
1428
|
-
|
1429
|
-
attributes set in the `@conda_base` flow-level decorator. Hence,
|
1430
|
-
you can use `@conda_base` to set packages required by all
|
1431
|
-
steps and use `@conda` to specify step-specific overrides.
|
441
|
+
This decorator is useful if this step may hang indefinitely.
|
1432
442
|
|
1433
|
-
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1440
|
-
python : str, optional, default None
|
1441
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1442
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1443
|
-
disabled : bool, default False
|
1444
|
-
If set to True, disables @conda.
|
443
|
+
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
444
|
+
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
445
|
+
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
446
|
+
|
447
|
+
Note that all the values specified in parameters are added together so if you specify
|
448
|
+
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
1445
449
|
"""
|
1446
450
|
...
|
1447
451
|
|
1448
452
|
@typing.overload
|
1449
|
-
def
|
453
|
+
def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
|
1450
454
|
...
|
1451
455
|
|
1452
456
|
@typing.overload
|
1453
|
-
def
|
457
|
+
def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
|
1454
458
|
...
|
1455
459
|
|
1456
|
-
def
|
460
|
+
def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
|
1457
461
|
"""
|
1458
|
-
Specifies
|
462
|
+
Specifies a timeout for your step.
|
1459
463
|
|
1460
|
-
|
1461
|
-
attributes set in the `@conda_base` flow-level decorator. Hence,
|
1462
|
-
you can use `@conda_base` to set packages required by all
|
1463
|
-
steps and use `@conda` to specify step-specific overrides.
|
464
|
+
This decorator is useful if this step may hang indefinitely.
|
1464
465
|
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
Packages to use for this step. The key is the name of the package
|
1469
|
-
and the value is the version to use.
|
1470
|
-
libraries : Dict[str, str], default {}
|
1471
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1472
|
-
python : str, optional, default None
|
1473
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1474
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1475
|
-
disabled : bool, default False
|
1476
|
-
If set to True, disables @conda.
|
1477
|
-
"""
|
1478
|
-
...
|
1479
|
-
|
1480
|
-
def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = "KUBERNETES_IMAGE_PULL_POLICY", service_account: str = "METAFLOW_KUBERNETES_SERVICE_ACCOUNT", secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = "METAFLOW_KUBERNETES_NAMESPACE", gpu: typing.Optional[int] = None, gpu_vendor: str = "KUBERNETES_GPU_VENDOR", tolerations: typing.List[str] = [], use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = "/metaflow_temp", persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
|
1481
|
-
"""
|
1482
|
-
Specifies that this step should execute on Kubernetes.
|
466
|
+
This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
|
467
|
+
A timeout is considered to be an exception thrown by the step. It will cause the step to be
|
468
|
+
retried if needed and the exception will be caught by the `@catch` decorator, if present.
|
1483
469
|
|
1484
|
-
|
1485
|
-
|
1486
|
-
cpu : int, default 1
|
1487
|
-
Number of CPUs required for this step. If `@resources` is
|
1488
|
-
also present, the maximum value from all decorators is used.
|
1489
|
-
memory : int, default 4096
|
1490
|
-
Memory size (in MB) required for this step. If
|
1491
|
-
`@resources` is also present, the maximum value from all decorators is
|
1492
|
-
used.
|
1493
|
-
disk : int, default 10240
|
1494
|
-
Disk size (in MB) required for this step. If
|
1495
|
-
`@resources` is also present, the maximum value from all decorators is
|
1496
|
-
used.
|
1497
|
-
image : str, optional, default None
|
1498
|
-
Docker image to use when launching on Kubernetes. If not specified, and
|
1499
|
-
METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
|
1500
|
-
not, a default Docker image mapping to the current version of Python is used.
|
1501
|
-
image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
|
1502
|
-
If given, the imagePullPolicy to be applied to the Docker image of the step.
|
1503
|
-
service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
|
1504
|
-
Kubernetes service account to use when launching pod in Kubernetes.
|
1505
|
-
secrets : List[str], optional, default None
|
1506
|
-
Kubernetes secrets to use when launching pod in Kubernetes. These
|
1507
|
-
secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
|
1508
|
-
in Metaflow configuration.
|
1509
|
-
node_selector: Union[Dict[str,str], str], optional, default None
|
1510
|
-
Kubernetes node selector(s) to apply to the pod running the task.
|
1511
|
-
Can be passed in as a comma separated string of values e.g. "kubernetes.io/os=linux,kubernetes.io/arch=amd64"
|
1512
|
-
or as a dictionary {"kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64"}
|
1513
|
-
namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
|
1514
|
-
Kubernetes namespace to use when launching pod in Kubernetes.
|
1515
|
-
gpu : int, optional, default None
|
1516
|
-
Number of GPUs required for this step. A value of zero implies that
|
1517
|
-
the scheduled node should not have GPUs.
|
1518
|
-
gpu_vendor : str, default KUBERNETES_GPU_VENDOR
|
1519
|
-
The vendor of the GPUs to be used for this step.
|
1520
|
-
tolerations : List[str], default []
|
1521
|
-
The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
|
1522
|
-
Kubernetes tolerations to use when launching pod in Kubernetes.
|
1523
|
-
use_tmpfs : bool, default False
|
1524
|
-
This enables an explicit tmpfs mount for this step.
|
1525
|
-
tmpfs_tempdir : bool, default True
|
1526
|
-
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
|
1527
|
-
tmpfs_size : int, optional, default: None
|
1528
|
-
The value for the size (in MiB) of the tmpfs mount for this step.
|
1529
|
-
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
|
1530
|
-
memory allocated for this step.
|
1531
|
-
tmpfs_path : str, optional, default /metaflow_temp
|
1532
|
-
Path to tmpfs mount for this step.
|
1533
|
-
persistent_volume_claims : Dict[str, str], optional, default None
|
1534
|
-
A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
|
1535
|
-
volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
|
1536
|
-
shared_memory: int, optional
|
1537
|
-
Shared memory size (in MiB) required for this step
|
1538
|
-
port: int, optional
|
1539
|
-
Port number to specify in the Kubernetes job object
|
1540
|
-
compute_pool : str, optional, default None
|
1541
|
-
Compute pool to be used for for this step.
|
1542
|
-
If not specified, any accessible compute pool within the perimeter is used.
|
1543
|
-
hostname_resolution_timeout: int, default 10 * 60
|
1544
|
-
Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
|
1545
|
-
Only applicable when @parallel is used.
|
470
|
+
Note that all the values specified in parameters are added together so if you specify
|
471
|
+
60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
|
1546
472
|
"""
|
1547
473
|
...
|
1548
474
|
|
@@ -1553,56 +479,25 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
|
|
1553
479
|
and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
|
1554
480
|
added as a flow decorators. Adding more than one decorator will ensure that `start` step
|
1555
481
|
starts only after all sensors finish.
|
1556
|
-
|
1557
|
-
Parameters
|
1558
|
-
----------
|
1559
|
-
timeout : int
|
1560
|
-
Time, in seconds before the task times out and fails. (Default: 3600)
|
1561
|
-
poke_interval : int
|
1562
|
-
Time in seconds that the job should wait in between each try. (Default: 60)
|
1563
|
-
mode : str
|
1564
|
-
How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
|
1565
|
-
exponential_backoff : bool
|
1566
|
-
allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
|
1567
|
-
pool : str
|
1568
|
-
the slot pool this task should run in,
|
1569
|
-
slot pools are a way to limit concurrency for certain tasks. (Default:None)
|
1570
|
-
soft_fail : bool
|
1571
|
-
Set to true to mark the task as SKIPPED on failure. (Default: False)
|
1572
|
-
name : str
|
1573
|
-
Name of the sensor on Airflow
|
1574
|
-
description : str
|
1575
|
-
Description of sensor in the Airflow UI
|
1576
|
-
bucket_key : Union[str, List[str]]
|
1577
|
-
The key(s) being waited on. Supports full s3:// style url or relative path from root level.
|
1578
|
-
When it's specified as a full s3:// url, please leave `bucket_name` as None
|
1579
|
-
bucket_name : str
|
1580
|
-
Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
|
1581
|
-
When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
|
1582
|
-
wildcard_match : bool
|
1583
|
-
whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
|
1584
|
-
aws_conn_id : str
|
1585
|
-
a reference to the s3 connection on Airflow. (Default: None)
|
1586
|
-
verify : bool
|
1587
|
-
Whether or not to verify SSL certificates for S3 connection. (Default: None)
|
1588
482
|
"""
|
1589
483
|
...
|
1590
484
|
|
1591
|
-
|
485
|
+
@typing.overload
|
486
|
+
def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1592
487
|
"""
|
1593
|
-
Specifies
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
488
|
+
Specifies the times when the flow should be run when running on a
|
489
|
+
production scheduler.
|
490
|
+
"""
|
491
|
+
...
|
492
|
+
|
493
|
+
@typing.overload
|
494
|
+
def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
495
|
+
...
|
496
|
+
|
497
|
+
def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
|
498
|
+
"""
|
499
|
+
Specifies the times when the flow should be run when running on a
|
500
|
+
production scheduler.
|
1606
501
|
"""
|
1607
502
|
...
|
1608
503
|
|
@@ -1613,19 +508,6 @@ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[s
|
|
1613
508
|
|
1614
509
|
Use `@conda_base` to set common libraries required by all
|
1615
510
|
steps and use `@conda` to specify step-specific additions.
|
1616
|
-
|
1617
|
-
Parameters
|
1618
|
-
----------
|
1619
|
-
packages : Dict[str, str], default {}
|
1620
|
-
Packages to use for this flow. The key is the name of the package
|
1621
|
-
and the value is the version to use.
|
1622
|
-
libraries : Dict[str, str], default {}
|
1623
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1624
|
-
python : str, optional, default None
|
1625
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1626
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1627
|
-
disabled : bool, default False
|
1628
|
-
If set to True, disables Conda.
|
1629
511
|
"""
|
1630
512
|
...
|
1631
513
|
|
@@ -1639,164 +521,6 @@ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packa
|
|
1639
521
|
|
1640
522
|
Use `@conda_base` to set common libraries required by all
|
1641
523
|
steps and use `@conda` to specify step-specific additions.
|
1642
|
-
|
1643
|
-
Parameters
|
1644
|
-
----------
|
1645
|
-
packages : Dict[str, str], default {}
|
1646
|
-
Packages to use for this flow. The key is the name of the package
|
1647
|
-
and the value is the version to use.
|
1648
|
-
libraries : Dict[str, str], default {}
|
1649
|
-
Supported for backward compatibility. When used with packages, packages will take precedence.
|
1650
|
-
python : str, optional, default None
|
1651
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1652
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1653
|
-
disabled : bool, default False
|
1654
|
-
If set to True, disables Conda.
|
1655
|
-
"""
|
1656
|
-
...
|
1657
|
-
|
1658
|
-
def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1659
|
-
"""
|
1660
|
-
The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
|
1661
|
-
This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
|
1662
|
-
|
1663
|
-
Parameters
|
1664
|
-
----------
|
1665
|
-
timeout : int
|
1666
|
-
Time, in seconds before the task times out and fails. (Default: 3600)
|
1667
|
-
poke_interval : int
|
1668
|
-
Time in seconds that the job should wait in between each try. (Default: 60)
|
1669
|
-
mode : str
|
1670
|
-
How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
|
1671
|
-
exponential_backoff : bool
|
1672
|
-
allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
|
1673
|
-
pool : str
|
1674
|
-
the slot pool this task should run in,
|
1675
|
-
slot pools are a way to limit concurrency for certain tasks. (Default:None)
|
1676
|
-
soft_fail : bool
|
1677
|
-
Set to true to mark the task as SKIPPED on failure. (Default: False)
|
1678
|
-
name : str
|
1679
|
-
Name of the sensor on Airflow
|
1680
|
-
description : str
|
1681
|
-
Description of sensor in the Airflow UI
|
1682
|
-
external_dag_id : str
|
1683
|
-
The dag_id that contains the task you want to wait for.
|
1684
|
-
external_task_ids : List[str]
|
1685
|
-
The list of task_ids that you want to wait for.
|
1686
|
-
If None (default value) the sensor waits for the DAG. (Default: None)
|
1687
|
-
allowed_states : List[str]
|
1688
|
-
Iterable of allowed states, (Default: ['success'])
|
1689
|
-
failed_states : List[str]
|
1690
|
-
Iterable of failed or dis-allowed states. (Default: None)
|
1691
|
-
execution_delta : datetime.timedelta
|
1692
|
-
time difference with the previous execution to look at,
|
1693
|
-
the default is the same logical date as the current task or DAG. (Default: None)
|
1694
|
-
check_existence: bool
|
1695
|
-
Set to True to check if the external task exists or check if
|
1696
|
-
the DAG to wait for exists. (Default: True)
|
1697
|
-
"""
|
1698
|
-
...
|
1699
|
-
|
1700
|
-
@typing.overload
|
1701
|
-
def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1702
|
-
"""
|
1703
|
-
Specifies the flow(s) that this flow depends on.
|
1704
|
-
|
1705
|
-
```
|
1706
|
-
@trigger_on_finish(flow='FooFlow')
|
1707
|
-
```
|
1708
|
-
or
|
1709
|
-
```
|
1710
|
-
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
1711
|
-
```
|
1712
|
-
This decorator respects the @project decorator and triggers the flow
|
1713
|
-
when upstream runs within the same namespace complete successfully
|
1714
|
-
|
1715
|
-
Additionally, you can specify project aware upstream flow dependencies
|
1716
|
-
by specifying the fully qualified project_flow_name.
|
1717
|
-
```
|
1718
|
-
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
1719
|
-
```
|
1720
|
-
or
|
1721
|
-
```
|
1722
|
-
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
1723
|
-
```
|
1724
|
-
|
1725
|
-
You can also specify just the project or project branch (other values will be
|
1726
|
-
inferred from the current project or project branch):
|
1727
|
-
```
|
1728
|
-
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
1729
|
-
```
|
1730
|
-
|
1731
|
-
Note that `branch` is typically one of:
|
1732
|
-
- `prod`
|
1733
|
-
- `user.bob`
|
1734
|
-
- `test.my_experiment`
|
1735
|
-
- `prod.staging`
|
1736
|
-
|
1737
|
-
Parameters
|
1738
|
-
----------
|
1739
|
-
flow : Union[str, Dict[str, str]], optional, default None
|
1740
|
-
Upstream flow dependency for this flow.
|
1741
|
-
flows : List[Union[str, Dict[str, str]]], default []
|
1742
|
-
Upstream flow dependencies for this flow.
|
1743
|
-
options : Dict[str, Any], default {}
|
1744
|
-
Backend-specific configuration for tuning eventing behavior.
|
1745
|
-
|
1746
|
-
|
1747
|
-
"""
|
1748
|
-
...
|
1749
|
-
|
1750
|
-
@typing.overload
|
1751
|
-
def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
1752
|
-
...
|
1753
|
-
|
1754
|
-
def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
|
1755
|
-
"""
|
1756
|
-
Specifies the flow(s) that this flow depends on.
|
1757
|
-
|
1758
|
-
```
|
1759
|
-
@trigger_on_finish(flow='FooFlow')
|
1760
|
-
```
|
1761
|
-
or
|
1762
|
-
```
|
1763
|
-
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
1764
|
-
```
|
1765
|
-
This decorator respects the @project decorator and triggers the flow
|
1766
|
-
when upstream runs within the same namespace complete successfully
|
1767
|
-
|
1768
|
-
Additionally, you can specify project aware upstream flow dependencies
|
1769
|
-
by specifying the fully qualified project_flow_name.
|
1770
|
-
```
|
1771
|
-
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
1772
|
-
```
|
1773
|
-
or
|
1774
|
-
```
|
1775
|
-
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
1776
|
-
```
|
1777
|
-
|
1778
|
-
You can also specify just the project or project branch (other values will be
|
1779
|
-
inferred from the current project or project branch):
|
1780
|
-
```
|
1781
|
-
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
1782
|
-
```
|
1783
|
-
|
1784
|
-
Note that `branch` is typically one of:
|
1785
|
-
- `prod`
|
1786
|
-
- `user.bob`
|
1787
|
-
- `test.my_experiment`
|
1788
|
-
- `prod.staging`
|
1789
|
-
|
1790
|
-
Parameters
|
1791
|
-
----------
|
1792
|
-
flow : Union[str, Dict[str, str]], optional, default None
|
1793
|
-
Upstream flow dependency for this flow.
|
1794
|
-
flows : List[Union[str, Dict[str, str]]], default []
|
1795
|
-
Upstream flow dependencies for this flow.
|
1796
|
-
options : Dict[str, Any], default {}
|
1797
|
-
Backend-specific configuration for tuning eventing behavior.
|
1798
|
-
|
1799
|
-
|
1800
524
|
"""
|
1801
525
|
...
|
1802
526
|
|
@@ -1832,17 +556,6 @@ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = No
|
|
1832
556
|
```
|
1833
557
|
@trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
|
1834
558
|
```
|
1835
|
-
|
1836
|
-
Parameters
|
1837
|
-
----------
|
1838
|
-
event : Union[str, Dict[str, Any]], optional, default None
|
1839
|
-
Event dependency for this flow.
|
1840
|
-
events : List[Union[str, Dict[str, Any]]], default []
|
1841
|
-
Events dependency for this flow.
|
1842
|
-
options : Dict[str, Any], default {}
|
1843
|
-
Backend-specific configuration for tuning eventing behavior.
|
1844
|
-
|
1845
|
-
|
1846
559
|
"""
|
1847
560
|
...
|
1848
561
|
|
@@ -1881,17 +594,6 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
|
|
1881
594
|
```
|
1882
595
|
@trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
|
1883
596
|
```
|
1884
|
-
|
1885
|
-
Parameters
|
1886
|
-
----------
|
1887
|
-
event : Union[str, Dict[str, Any]], optional, default None
|
1888
|
-
Event dependency for this flow.
|
1889
|
-
events : List[Union[str, Dict[str, Any]]], default []
|
1890
|
-
Events dependency for this flow.
|
1891
|
-
options : Dict[str, Any], default {}
|
1892
|
-
Backend-specific configuration for tuning eventing behavior.
|
1893
|
-
|
1894
|
-
|
1895
597
|
"""
|
1896
598
|
...
|
1897
599
|
|
@@ -1902,14 +604,6 @@ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[s
|
|
1902
604
|
|
1903
605
|
Use `@pypi_base` to set common packages required by all
|
1904
606
|
steps and use `@pypi` to specify step-specific overrides.
|
1905
|
-
Parameters
|
1906
|
-
----------
|
1907
|
-
packages : Dict[str, str], default: {}
|
1908
|
-
Packages to use for this flow. The key is the name of the package
|
1909
|
-
and the value is the version to use.
|
1910
|
-
python : str, optional, default: None
|
1911
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1912
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1913
607
|
"""
|
1914
608
|
...
|
1915
609
|
|
@@ -1923,1501 +617,103 @@ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packag
|
|
1923
617
|
|
1924
618
|
Use `@pypi_base` to set common packages required by all
|
1925
619
|
steps and use `@pypi` to specify step-specific overrides.
|
1926
|
-
Parameters
|
1927
|
-
----------
|
1928
|
-
packages : Dict[str, str], default: {}
|
1929
|
-
Packages to use for this flow. The key is the name of the package
|
1930
|
-
and the value is the version to use.
|
1931
|
-
python : str, optional, default: None
|
1932
|
-
Version of Python to use, e.g. '3.7.4'. A default value of None implies
|
1933
|
-
that the version used will correspond to the version of the Python interpreter used to start the run.
|
1934
620
|
"""
|
1935
621
|
...
|
1936
622
|
|
1937
|
-
|
1938
|
-
def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
623
|
+
def project(*, name: str) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1939
624
|
"""
|
1940
|
-
Specifies
|
1941
|
-
production scheduler.
|
625
|
+
Specifies what flows belong to the same project.
|
1942
626
|
|
1943
|
-
|
1944
|
-
|
1945
|
-
hourly : bool, default False
|
1946
|
-
Run the workflow hourly.
|
1947
|
-
daily : bool, default True
|
1948
|
-
Run the workflow daily.
|
1949
|
-
weekly : bool, default False
|
1950
|
-
Run the workflow weekly.
|
1951
|
-
cron : str, optional, default None
|
1952
|
-
Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
|
1953
|
-
specified by this expression.
|
1954
|
-
timezone : str, optional, default None
|
1955
|
-
Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
|
1956
|
-
which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
|
627
|
+
A project-specific namespace is created for all flows that
|
628
|
+
use the same `@project(name)`.
|
1957
629
|
"""
|
1958
630
|
...
|
1959
631
|
|
1960
632
|
@typing.overload
|
1961
|
-
def
|
1962
|
-
...
|
1963
|
-
|
1964
|
-
def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
|
633
|
+
def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
1965
634
|
"""
|
1966
|
-
Specifies the
|
1967
|
-
production scheduler.
|
635
|
+
Specifies the flow(s) that this flow depends on.
|
1968
636
|
|
1969
|
-
|
1970
|
-
|
1971
|
-
|
1972
|
-
|
1973
|
-
|
1974
|
-
|
1975
|
-
|
1976
|
-
|
1977
|
-
|
1978
|
-
Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
|
1979
|
-
specified by this expression.
|
1980
|
-
timezone : str, optional, default None
|
1981
|
-
Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
|
1982
|
-
which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
|
1983
|
-
"""
|
1984
|
-
...
|
1985
|
-
|
1986
|
-
def namespace(ns: typing.Optional[str]) -> typing.Optional[str]:
|
1987
|
-
"""
|
1988
|
-
Switch namespace to the one provided.
|
637
|
+
```
|
638
|
+
@trigger_on_finish(flow='FooFlow')
|
639
|
+
```
|
640
|
+
or
|
641
|
+
```
|
642
|
+
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
643
|
+
```
|
644
|
+
This decorator respects the @project decorator and triggers the flow
|
645
|
+
when upstream runs within the same namespace complete successfully
|
1989
646
|
|
1990
|
-
|
1991
|
-
|
1992
|
-
|
647
|
+
Additionally, you can specify project aware upstream flow dependencies
|
648
|
+
by specifying the fully qualified project_flow_name.
|
649
|
+
```
|
650
|
+
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
651
|
+
```
|
652
|
+
or
|
653
|
+
```
|
654
|
+
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
655
|
+
```
|
1993
656
|
|
1994
|
-
|
1995
|
-
|
1996
|
-
|
1997
|
-
|
657
|
+
You can also specify just the project or project branch (other values will be
|
658
|
+
inferred from the current project or project branch):
|
659
|
+
```
|
660
|
+
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
661
|
+
```
|
1998
662
|
|
1999
|
-
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
663
|
+
Note that `branch` is typically one of:
|
664
|
+
- `prod`
|
665
|
+
- `user.bob`
|
666
|
+
- `test.my_experiment`
|
667
|
+
- `prod.staging`
|
2003
668
|
"""
|
2004
669
|
...
|
2005
670
|
|
2006
|
-
|
2007
|
-
|
2008
|
-
Return the current namespace that is currently being used to filter objects.
|
2009
|
-
|
2010
|
-
The namespace is a tag associated with all objects in Metaflow.
|
2011
|
-
|
2012
|
-
Returns
|
2013
|
-
-------
|
2014
|
-
str, optional
|
2015
|
-
The current namespace used to filter objects.
|
2016
|
-
"""
|
671
|
+
@typing.overload
|
672
|
+
def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
|
2017
673
|
...
|
2018
674
|
|
2019
|
-
def
|
675
|
+
def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
|
2020
676
|
"""
|
2021
|
-
|
2022
|
-
used prior to any `namespace` calls.
|
677
|
+
Specifies the flow(s) that this flow depends on.
|
2023
678
|
|
2024
|
-
|
2025
|
-
|
2026
|
-
|
2027
|
-
|
2028
|
-
|
2029
|
-
|
2030
|
-
|
2031
|
-
|
2032
|
-
|
2033
|
-
Returns the current Metadata provider.
|
679
|
+
```
|
680
|
+
@trigger_on_finish(flow='FooFlow')
|
681
|
+
```
|
682
|
+
or
|
683
|
+
```
|
684
|
+
@trigger_on_finish(flows=['FooFlow', 'BarFlow'])
|
685
|
+
```
|
686
|
+
This decorator respects the @project decorator and triggers the flow
|
687
|
+
when upstream runs within the same namespace complete successfully
|
2034
688
|
|
2035
|
-
|
2036
|
-
|
2037
|
-
|
689
|
+
Additionally, you can specify project aware upstream flow dependencies
|
690
|
+
by specifying the fully qualified project_flow_name.
|
691
|
+
```
|
692
|
+
@trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
|
693
|
+
```
|
694
|
+
or
|
695
|
+
```
|
696
|
+
@trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
|
697
|
+
```
|
2038
698
|
|
2039
|
-
|
2040
|
-
|
699
|
+
You can also specify just the project or project branch (other values will be
|
700
|
+
inferred from the current project or project branch):
|
701
|
+
```
|
702
|
+
@trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
|
703
|
+
```
|
2041
704
|
|
2042
|
-
|
2043
|
-
|
2044
|
-
|
2045
|
-
|
2046
|
-
|
2047
|
-
local providers).
|
705
|
+
Note that `branch` is typically one of:
|
706
|
+
- `prod`
|
707
|
+
- `user.bob`
|
708
|
+
- `test.my_experiment`
|
709
|
+
- `prod.staging`
|
2048
710
|
"""
|
2049
711
|
...
|
2050
712
|
|
2051
|
-
def
|
2052
|
-
"""
|
2053
|
-
Resets the Metadata provider to the default value, that is, to the value
|
2054
|
-
that was used prior to any `metadata` calls.
|
2055
|
-
|
2056
|
-
Returns
|
2057
|
-
-------
|
2058
|
-
str
|
2059
|
-
The result of get_metadata() after resetting the provider.
|
713
|
+
def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
|
2060
714
|
"""
|
2061
|
-
|
2062
|
-
|
2063
|
-
class Metaflow(object, metaclass=type):
|
715
|
+
The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
|
716
|
+
This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
|
2064
717
|
"""
|
2065
|
-
Entry point to all objects in the Metaflow universe.
|
2066
|
-
|
2067
|
-
This object can be used to list all the flows present either through the explicit property
|
2068
|
-
or by iterating over this object.
|
2069
|
-
|
2070
|
-
Attributes
|
2071
|
-
----------
|
2072
|
-
flows : List[Flow]
|
2073
|
-
Returns the list of all `Flow` objects known to this metadata provider. Note that only
|
2074
|
-
flows present in the current namespace will be returned. A `Flow` is present in a namespace
|
2075
|
-
if it has at least one run in the namespace.
|
2076
|
-
"""
|
2077
|
-
def __init__(self, _current_metadata: typing.Optional[str] = None):
|
2078
|
-
...
|
2079
|
-
@property
|
2080
|
-
def flows(self) -> typing.List[metaflow.client.core.Flow]:
|
2081
|
-
"""
|
2082
|
-
Returns a list of all the flows present.
|
2083
|
-
|
2084
|
-
Only flows present in the set namespace are returned. A flow is present in a namespace if
|
2085
|
-
it has at least one run that is in the namespace.
|
2086
|
-
|
2087
|
-
Returns
|
2088
|
-
-------
|
2089
|
-
List[Flow]
|
2090
|
-
List of all flows present.
|
2091
|
-
"""
|
2092
|
-
...
|
2093
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Flow]:
|
2094
|
-
"""
|
2095
|
-
Iterator over all flows present.
|
2096
|
-
|
2097
|
-
Only flows present in the set namespace are returned. A flow is present in a
|
2098
|
-
namespace if it has at least one run that is in the namespace.
|
2099
|
-
|
2100
|
-
Yields
|
2101
|
-
-------
|
2102
|
-
Flow
|
2103
|
-
A Flow present in the Metaflow universe.
|
2104
|
-
"""
|
2105
|
-
...
|
2106
|
-
def __str__(self) -> str:
|
2107
|
-
...
|
2108
|
-
def __getitem__(self, name: str) -> metaflow.client.core.Flow:
|
2109
|
-
"""
|
2110
|
-
Returns a specific flow by name.
|
2111
|
-
|
2112
|
-
The flow will only be returned if it is present in the current namespace.
|
2113
|
-
|
2114
|
-
Parameters
|
2115
|
-
----------
|
2116
|
-
name : str
|
2117
|
-
Name of the Flow
|
2118
|
-
|
2119
|
-
Returns
|
2120
|
-
-------
|
2121
|
-
Flow
|
2122
|
-
Flow with the given name.
|
2123
|
-
"""
|
2124
|
-
...
|
2125
|
-
...
|
2126
|
-
|
2127
|
-
class Flow(metaflow.client.core.MetaflowObject, metaclass=type):
|
2128
|
-
"""
|
2129
|
-
A Flow represents all existing flows with a certain name, in other words,
|
2130
|
-
classes derived from `FlowSpec`. A container of `Run` objects.
|
2131
|
-
|
2132
|
-
Attributes
|
2133
|
-
----------
|
2134
|
-
latest_run : Run
|
2135
|
-
Latest `Run` (in progress or completed, successfully or not) of this flow.
|
2136
|
-
latest_successful_run : Run
|
2137
|
-
Latest successfully completed `Run` of this flow.
|
2138
|
-
"""
|
2139
|
-
def __init__(self, *args, **kwargs):
|
2140
|
-
...
|
2141
|
-
@property
|
2142
|
-
def latest_run(self) -> typing.Optional[metaflow.client.core.Run]:
|
2143
|
-
"""
|
2144
|
-
Returns the latest run (either in progress or completed) of this flow.
|
2145
|
-
|
2146
|
-
Note that an in-progress run may be returned by this call. Use latest_successful_run
|
2147
|
-
to get an object representing a completed successful run.
|
2148
|
-
|
2149
|
-
Returns
|
2150
|
-
-------
|
2151
|
-
Run, optional
|
2152
|
-
Latest run of this flow
|
2153
|
-
"""
|
2154
|
-
...
|
2155
|
-
@property
|
2156
|
-
def latest_successful_run(self) -> typing.Optional[metaflow.client.core.Run]:
|
2157
|
-
"""
|
2158
|
-
Returns the latest successful run of this flow.
|
2159
|
-
|
2160
|
-
Returns
|
2161
|
-
-------
|
2162
|
-
Run, optional
|
2163
|
-
Latest successful run of this flow
|
2164
|
-
"""
|
2165
|
-
...
|
2166
|
-
def runs(self, *tags: str) -> typing.Iterator[metaflow.client.core.Run]:
|
2167
|
-
"""
|
2168
|
-
Returns an iterator over all `Run`s of this flow.
|
2169
|
-
|
2170
|
-
An optional filter is available that allows you to filter on tags.
|
2171
|
-
If multiple tags are specified, only runs that have all the
|
2172
|
-
specified tags are returned.
|
2173
|
-
|
2174
|
-
Parameters
|
2175
|
-
----------
|
2176
|
-
tags : str
|
2177
|
-
Tags to match.
|
2178
|
-
|
2179
|
-
Yields
|
2180
|
-
------
|
2181
|
-
Run
|
2182
|
-
`Run` objects in this flow.
|
2183
|
-
"""
|
2184
|
-
...
|
2185
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Task]:
|
2186
|
-
"""
|
2187
|
-
Iterate over all children Run of this Flow.
|
2188
|
-
|
2189
|
-
Note that only runs in the current namespace are returned unless
|
2190
|
-
_namespace_check is False
|
2191
|
-
|
2192
|
-
Yields
|
2193
|
-
------
|
2194
|
-
Run
|
2195
|
-
A Run in this Flow
|
2196
|
-
"""
|
2197
|
-
...
|
2198
|
-
def __getitem__(self, run_id: str) -> metaflow.client.core.Run:
|
2199
|
-
"""
|
2200
|
-
Returns the Run object with the run ID 'run_id'
|
2201
|
-
|
2202
|
-
Parameters
|
2203
|
-
----------
|
2204
|
-
run_id : str
|
2205
|
-
Run OD
|
2206
|
-
|
2207
|
-
Returns
|
2208
|
-
-------
|
2209
|
-
Run
|
2210
|
-
Run for this run ID in this Flow
|
2211
|
-
|
2212
|
-
Raises
|
2213
|
-
------
|
2214
|
-
KeyError
|
2215
|
-
If the run_id does not identify a valid Run object
|
2216
|
-
"""
|
2217
|
-
...
|
2218
|
-
def __getstate__(self):
|
2219
|
-
...
|
2220
|
-
def __setstate__(self, state):
|
2221
|
-
...
|
2222
|
-
...
|
2223
|
-
|
2224
|
-
class Run(metaflow.client.core.MetaflowObject, metaclass=type):
|
2225
|
-
"""
|
2226
|
-
A `Run` represents an execution of a `Flow`. It is a container of `Step`s.
|
2227
|
-
|
2228
|
-
Attributes
|
2229
|
-
----------
|
2230
|
-
data : MetaflowData
|
2231
|
-
a shortcut to run['end'].task.data, i.e. data produced by this run.
|
2232
|
-
successful : bool
|
2233
|
-
True if the run completed successfully.
|
2234
|
-
finished : bool
|
2235
|
-
True if the run completed.
|
2236
|
-
finished_at : datetime
|
2237
|
-
Time this run finished.
|
2238
|
-
code : MetaflowCode
|
2239
|
-
Code package for this run (if present). See `MetaflowCode`.
|
2240
|
-
trigger : MetaflowTrigger
|
2241
|
-
Information about event(s) that triggered this run (if present). See `MetaflowTrigger`.
|
2242
|
-
end_task : Task
|
2243
|
-
`Task` for the end step (if it is present already).
|
2244
|
-
"""
|
2245
|
-
def steps(self, *tags: str) -> typing.Iterator[metaflow.client.core.Step]:
|
2246
|
-
"""
|
2247
|
-
[Legacy function - do not use]
|
2248
|
-
|
2249
|
-
Returns an iterator over all `Step` objects in the step. This is an alias
|
2250
|
-
to iterating the object itself, i.e.
|
2251
|
-
```
|
2252
|
-
list(Run(...)) == list(Run(...).steps())
|
2253
|
-
```
|
2254
|
-
|
2255
|
-
Parameters
|
2256
|
-
----------
|
2257
|
-
tags : str
|
2258
|
-
No op (legacy functionality)
|
2259
|
-
|
2260
|
-
Yields
|
2261
|
-
------
|
2262
|
-
Step
|
2263
|
-
`Step` objects in this run.
|
2264
|
-
"""
|
2265
|
-
...
|
2266
|
-
@property
|
2267
|
-
def code(self) -> typing.Optional[metaflow.client.core.MetaflowCode]:
|
2268
|
-
"""
|
2269
|
-
Returns the MetaflowCode object for this run, if present.
|
2270
|
-
Code is packed if atleast one `Step` runs remotely, else None is returned.
|
2271
|
-
|
2272
|
-
Returns
|
2273
|
-
-------
|
2274
|
-
MetaflowCode, optional
|
2275
|
-
Code package for this run
|
2276
|
-
"""
|
2277
|
-
...
|
2278
|
-
@property
|
2279
|
-
def data(self) -> typing.Optional[metaflow.client.core.MetaflowData]:
|
2280
|
-
"""
|
2281
|
-
Returns a container of data artifacts produced by this run.
|
2282
|
-
|
2283
|
-
You can access data produced by this run as follows:
|
2284
|
-
```
|
2285
|
-
print(run.data.my_var)
|
2286
|
-
```
|
2287
|
-
This is a shorthand for `run['end'].task.data`. If the 'end' step has not yet
|
2288
|
-
executed, returns None.
|
2289
|
-
|
2290
|
-
Returns
|
2291
|
-
-------
|
2292
|
-
MetaflowData, optional
|
2293
|
-
Container of all artifacts produced by this task
|
2294
|
-
"""
|
2295
|
-
...
|
2296
|
-
@property
|
2297
|
-
def successful(self) -> bool:
|
2298
|
-
"""
|
2299
|
-
Indicates whether or not the run completed successfully.
|
2300
|
-
|
2301
|
-
A run is successful if its 'end' step is successful.
|
2302
|
-
|
2303
|
-
Returns
|
2304
|
-
-------
|
2305
|
-
bool
|
2306
|
-
True if the run completed successfully and False otherwise
|
2307
|
-
"""
|
2308
|
-
...
|
2309
|
-
@property
|
2310
|
-
def finished(self) -> bool:
|
2311
|
-
"""
|
2312
|
-
Indicates whether or not the run completed.
|
2313
|
-
|
2314
|
-
A run completed if its 'end' step completed.
|
2315
|
-
|
2316
|
-
Returns
|
2317
|
-
-------
|
2318
|
-
bool
|
2319
|
-
True if the run completed and False otherwise
|
2320
|
-
"""
|
2321
|
-
...
|
2322
|
-
@property
|
2323
|
-
def finished_at(self) -> typing.Optional[datetime.datetime]:
|
2324
|
-
"""
|
2325
|
-
Returns the datetime object of when the run finished (successfully or not).
|
2326
|
-
|
2327
|
-
The completion time of a run is the same as the completion time of its 'end' step.
|
2328
|
-
If the 'end' step has not completed, returns None.
|
2329
|
-
|
2330
|
-
Returns
|
2331
|
-
-------
|
2332
|
-
datetime, optional
|
2333
|
-
Datetime of when the run finished
|
2334
|
-
"""
|
2335
|
-
...
|
2336
|
-
@property
|
2337
|
-
def end_task(self) -> typing.Optional[metaflow.client.core.Task]:
|
2338
|
-
"""
|
2339
|
-
Returns the Task corresponding to the 'end' step.
|
2340
|
-
|
2341
|
-
This returns None if the end step does not yet exist.
|
2342
|
-
|
2343
|
-
Returns
|
2344
|
-
-------
|
2345
|
-
Task, optional
|
2346
|
-
The 'end' task
|
2347
|
-
"""
|
2348
|
-
...
|
2349
|
-
def add_tag(self, tag: str):
|
2350
|
-
"""
|
2351
|
-
Add a tag to this `Run`.
|
2352
|
-
|
2353
|
-
Note that if the tag is already a system tag, it is not added as a user tag,
|
2354
|
-
and no error is thrown.
|
2355
|
-
|
2356
|
-
Parameters
|
2357
|
-
----------
|
2358
|
-
tag : str
|
2359
|
-
Tag to add.
|
2360
|
-
"""
|
2361
|
-
...
|
2362
|
-
def add_tags(self, tags: typing.Iterable[str]):
|
2363
|
-
"""
|
2364
|
-
Add one or more tags to this `Run`.
|
2365
|
-
|
2366
|
-
Note that if any tag is already a system tag, it is not added as a user tag
|
2367
|
-
and no error is thrown.
|
2368
|
-
|
2369
|
-
Parameters
|
2370
|
-
----------
|
2371
|
-
tags : Iterable[str]
|
2372
|
-
Tags to add.
|
2373
|
-
"""
|
2374
|
-
...
|
2375
|
-
def remove_tag(self, tag: str):
|
2376
|
-
"""
|
2377
|
-
Remove one tag from this `Run`.
|
2378
|
-
|
2379
|
-
Removing a system tag is an error. Removing a non-existent
|
2380
|
-
user tag is a no-op.
|
2381
|
-
|
2382
|
-
Parameters
|
2383
|
-
----------
|
2384
|
-
tag : str
|
2385
|
-
Tag to remove.
|
2386
|
-
"""
|
2387
|
-
...
|
2388
|
-
def remove_tags(self, tags: typing.Iterable[str]):
|
2389
|
-
"""
|
2390
|
-
Remove one or more tags to this `Run`.
|
2391
|
-
|
2392
|
-
Removing a system tag will result in an error. Removing a non-existent
|
2393
|
-
user tag is a no-op.
|
2394
|
-
|
2395
|
-
Parameters
|
2396
|
-
----------
|
2397
|
-
tags : Iterable[str]
|
2398
|
-
Tags to remove.
|
2399
|
-
"""
|
2400
|
-
...
|
2401
|
-
def replace_tag(self, tag_to_remove: str, tag_to_add: str):
|
2402
|
-
"""
|
2403
|
-
Remove a tag and add a tag atomically. Removal is done first.
|
2404
|
-
The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
|
2405
|
-
|
2406
|
-
Parameters
|
2407
|
-
----------
|
2408
|
-
tag_to_remove : str
|
2409
|
-
Tag to remove.
|
2410
|
-
tag_to_add : str
|
2411
|
-
Tag to add.
|
2412
|
-
"""
|
2413
|
-
...
|
2414
|
-
def replace_tags(self, tags_to_remove: typing.Iterable[str], tags_to_add: typing.Iterable[str]):
|
2415
|
-
"""
|
2416
|
-
Remove and add tags atomically; the removal is done first.
|
2417
|
-
The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
|
2418
|
-
|
2419
|
-
Parameters
|
2420
|
-
----------
|
2421
|
-
tags_to_remove : Iterable[str]
|
2422
|
-
Tags to remove.
|
2423
|
-
tags_to_add : Iterable[str]
|
2424
|
-
Tags to add.
|
2425
|
-
"""
|
2426
|
-
...
|
2427
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Step]:
|
2428
|
-
"""
|
2429
|
-
Iterate over all children Step of this Run
|
2430
|
-
|
2431
|
-
Yields
|
2432
|
-
------
|
2433
|
-
Step
|
2434
|
-
A Step in this Run
|
2435
|
-
"""
|
2436
|
-
...
|
2437
|
-
def __getitem__(self, name: str) -> metaflow.client.core.Step:
|
2438
|
-
"""
|
2439
|
-
Returns the Step object with the step name 'name'
|
2440
|
-
|
2441
|
-
Parameters
|
2442
|
-
----------
|
2443
|
-
name : str
|
2444
|
-
Step name
|
2445
|
-
|
2446
|
-
Returns
|
2447
|
-
-------
|
2448
|
-
Step
|
2449
|
-
Step for this step name in this Run
|
2450
|
-
|
2451
|
-
Raises
|
2452
|
-
------
|
2453
|
-
KeyError
|
2454
|
-
If the name does not identify a valid Step object
|
2455
|
-
"""
|
2456
|
-
...
|
2457
|
-
def __getstate__(self):
|
2458
|
-
...
|
2459
|
-
def __setstate__(self, state):
|
2460
|
-
...
|
2461
|
-
@property
|
2462
|
-
def trigger(self) -> typing.Optional[metaflow.events.Trigger]:
|
2463
|
-
"""
|
2464
|
-
Returns a container of events that triggered this run.
|
2465
|
-
|
2466
|
-
This returns None if the run was not triggered by any events.
|
2467
|
-
|
2468
|
-
Returns
|
2469
|
-
-------
|
2470
|
-
Trigger, optional
|
2471
|
-
Container of triggering events
|
2472
|
-
"""
|
2473
|
-
...
|
2474
|
-
...
|
2475
|
-
|
2476
|
-
class Step(metaflow.client.core.MetaflowObject, metaclass=type):
|
2477
|
-
"""
|
2478
|
-
A `Step` represents a user-defined step, that is, a method annotated with the `@step` decorator.
|
2479
|
-
|
2480
|
-
It contains `Task` objects associated with the step, that is, all executions of the
|
2481
|
-
`Step`. The step may contain multiple `Task`s in the case of a foreach step.
|
2482
|
-
|
2483
|
-
Attributes
|
2484
|
-
----------
|
2485
|
-
task : Task
|
2486
|
-
The first `Task` object in this step. This is a shortcut for retrieving the only
|
2487
|
-
task contained in a non-foreach step.
|
2488
|
-
finished_at : datetime
|
2489
|
-
Time when the latest `Task` of this step finished. Note that in the case of foreaches,
|
2490
|
-
this time may change during execution of the step.
|
2491
|
-
environment_info : Dict[str, Any]
|
2492
|
-
Information about the execution environment.
|
2493
|
-
"""
|
2494
|
-
@property
|
2495
|
-
def task(self) -> typing.Optional[metaflow.client.core.Task]:
|
2496
|
-
"""
|
2497
|
-
Returns a Task object belonging to this step.
|
2498
|
-
|
2499
|
-
This is useful when the step only contains one task (a linear step for example).
|
2500
|
-
|
2501
|
-
Returns
|
2502
|
-
-------
|
2503
|
-
Task
|
2504
|
-
A task in the step
|
2505
|
-
"""
|
2506
|
-
...
|
2507
|
-
def tasks(self, *tags: str) -> typing.Iterable[metaflow.client.core.Task]:
|
2508
|
-
"""
|
2509
|
-
[Legacy function - do not use]
|
2510
|
-
|
2511
|
-
Returns an iterator over all `Task` objects in the step. This is an alias
|
2512
|
-
to iterating the object itself, i.e.
|
2513
|
-
```
|
2514
|
-
list(Step(...)) == list(Step(...).tasks())
|
2515
|
-
```
|
2516
|
-
|
2517
|
-
Parameters
|
2518
|
-
----------
|
2519
|
-
tags : str
|
2520
|
-
No op (legacy functionality)
|
2521
|
-
|
2522
|
-
Yields
|
2523
|
-
------
|
2524
|
-
Task
|
2525
|
-
`Task` objects in this step.
|
2526
|
-
"""
|
2527
|
-
...
|
2528
|
-
@property
|
2529
|
-
def control_task(self) -> typing.Optional[metaflow.client.core.Task]:
|
2530
|
-
"""
|
2531
|
-
[Unpublished API - use with caution!]
|
2532
|
-
|
2533
|
-
Returns a Control Task object belonging to this step.
|
2534
|
-
This is useful when the step only contains one control task.
|
2535
|
-
|
2536
|
-
Returns
|
2537
|
-
-------
|
2538
|
-
Task
|
2539
|
-
A control task in the step
|
2540
|
-
"""
|
2541
|
-
...
|
2542
|
-
def control_tasks(self, *tags: str) -> typing.Iterator[metaflow.client.core.Task]:
|
2543
|
-
"""
|
2544
|
-
[Unpublished API - use with caution!]
|
2545
|
-
|
2546
|
-
Returns an iterator over all the control tasks in the step.
|
2547
|
-
An optional filter is available that allows you to filter on tags. The
|
2548
|
-
control tasks returned if the filter is specified will contain all the
|
2549
|
-
tags specified.
|
2550
|
-
Parameters
|
2551
|
-
----------
|
2552
|
-
tags : str
|
2553
|
-
Tags to match
|
2554
|
-
|
2555
|
-
Yields
|
2556
|
-
------
|
2557
|
-
Task
|
2558
|
-
Control Task objects for this step
|
2559
|
-
"""
|
2560
|
-
...
|
2561
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.Task]:
|
2562
|
-
"""
|
2563
|
-
Iterate over all children Task of this Step
|
2564
|
-
|
2565
|
-
Yields
|
2566
|
-
------
|
2567
|
-
Task
|
2568
|
-
A Task in this Step
|
2569
|
-
"""
|
2570
|
-
...
|
2571
|
-
def __getitem__(self, task_id: str) -> metaflow.client.core.Task:
|
2572
|
-
"""
|
2573
|
-
Returns the Task object with the task ID 'task_id'
|
2574
|
-
|
2575
|
-
Parameters
|
2576
|
-
----------
|
2577
|
-
task_id : str
|
2578
|
-
Task ID
|
2579
|
-
|
2580
|
-
Returns
|
2581
|
-
-------
|
2582
|
-
Task
|
2583
|
-
Task for this task ID in this Step
|
2584
|
-
|
2585
|
-
Raises
|
2586
|
-
------
|
2587
|
-
KeyError
|
2588
|
-
If the task_id does not identify a valid Task object
|
2589
|
-
"""
|
2590
|
-
...
|
2591
|
-
def __getstate__(self):
|
2592
|
-
...
|
2593
|
-
def __setstate__(self, state):
|
2594
|
-
...
|
2595
|
-
@property
|
2596
|
-
def finished_at(self) -> typing.Optional[datetime.datetime]:
|
2597
|
-
"""
|
2598
|
-
Returns the datetime object of when the step finished (successfully or not).
|
2599
|
-
|
2600
|
-
A step is considered finished when all the tasks that belong to it have
|
2601
|
-
finished. This call will return None if the step has not finished
|
2602
|
-
|
2603
|
-
Returns
|
2604
|
-
-------
|
2605
|
-
datetime
|
2606
|
-
Datetime of when the step finished
|
2607
|
-
"""
|
2608
|
-
...
|
2609
|
-
@property
|
2610
|
-
def environment_info(self) -> typing.Optional[typing.Dict[str, typing.Any]]:
|
2611
|
-
"""
|
2612
|
-
Returns information about the environment that was used to execute this step. As an
|
2613
|
-
example, if the Conda environment is selected, this will return information about the
|
2614
|
-
dependencies that were used in the environment.
|
2615
|
-
|
2616
|
-
This environment information is only available for steps that have tasks
|
2617
|
-
for which the code package has been saved.
|
2618
|
-
|
2619
|
-
Returns
|
2620
|
-
-------
|
2621
|
-
Dict[str, Any], optional
|
2622
|
-
Dictionary describing the environment
|
2623
|
-
"""
|
2624
|
-
...
|
2625
|
-
...
|
2626
|
-
|
2627
|
-
class Task(metaflow.client.core.MetaflowObject, metaclass=type):
|
2628
|
-
"""
|
2629
|
-
A `Task` represents an execution of a `Step`.
|
2630
|
-
|
2631
|
-
It contains all `DataArtifact` objects produced by the task as
|
2632
|
-
well as metadata related to execution.
|
2633
|
-
|
2634
|
-
Note that the `@retry` decorator may cause multiple attempts of
|
2635
|
-
the task to be present. Usually you want the latest attempt, which
|
2636
|
-
is what instantiating a `Task` object returns by default. If
|
2637
|
-
you need to e.g. retrieve logs from a failed attempt, you can
|
2638
|
-
explicitly get information about a specific attempt by using the
|
2639
|
-
following syntax when creating a task:
|
2640
|
-
|
2641
|
-
`Task('flow/run/step/task', attempt=<attempt>)`
|
2642
|
-
|
2643
|
-
where `attempt=0` corresponds to the first attempt etc.
|
2644
|
-
|
2645
|
-
Attributes
|
2646
|
-
----------
|
2647
|
-
metadata : List[Metadata]
|
2648
|
-
List of all metadata events associated with the task.
|
2649
|
-
metadata_dict : Dict[str, str]
|
2650
|
-
A condensed version of `metadata`: A dictionary where keys
|
2651
|
-
are names of metadata events and values the latest corresponding event.
|
2652
|
-
data : MetaflowData
|
2653
|
-
Container of all data artifacts produced by this task. Note that this
|
2654
|
-
call downloads all data locally, so it can be slower than accessing
|
2655
|
-
artifacts individually. See `MetaflowData` for more information.
|
2656
|
-
artifacts : MetaflowArtifacts
|
2657
|
-
Container of `DataArtifact` objects produced by this task.
|
2658
|
-
successful : bool
|
2659
|
-
True if the task completed successfully.
|
2660
|
-
finished : bool
|
2661
|
-
True if the task completed.
|
2662
|
-
exception : object
|
2663
|
-
Exception raised by this task if there was one.
|
2664
|
-
finished_at : datetime
|
2665
|
-
Time this task finished.
|
2666
|
-
runtime_name : str
|
2667
|
-
Runtime this task was executed on.
|
2668
|
-
stdout : str
|
2669
|
-
Standard output for the task execution.
|
2670
|
-
stderr : str
|
2671
|
-
Standard error output for the task execution.
|
2672
|
-
code : MetaflowCode
|
2673
|
-
Code package for this task (if present). See `MetaflowCode`.
|
2674
|
-
environment_info : Dict[str, str]
|
2675
|
-
Information about the execution environment.
|
2676
|
-
"""
|
2677
|
-
def __init__(self, *args, **kwargs):
|
2678
|
-
...
|
2679
|
-
@property
|
2680
|
-
def metadata(self) -> typing.List[metaflow.client.core.Metadata]:
|
2681
|
-
"""
|
2682
|
-
Metadata events produced by this task across all attempts of the task
|
2683
|
-
*except* if you selected a specific task attempt.
|
2684
|
-
|
2685
|
-
Note that Metadata is different from tags.
|
2686
|
-
|
2687
|
-
Returns
|
2688
|
-
-------
|
2689
|
-
List[Metadata]
|
2690
|
-
Metadata produced by this task
|
2691
|
-
"""
|
2692
|
-
...
|
2693
|
-
@property
|
2694
|
-
def metadata_dict(self) -> typing.Dict[str, str]:
|
2695
|
-
"""
|
2696
|
-
Dictionary mapping metadata names (keys) and their associated values.
|
2697
|
-
|
2698
|
-
Note that unlike the metadata() method, this call will only return the latest
|
2699
|
-
metadata for a given name. For example, if a task executes multiple times (retries),
|
2700
|
-
the same metadata name will be generated multiple times (one for each execution of the
|
2701
|
-
task). The metadata() method returns all those metadata elements whereas this call will
|
2702
|
-
return the metadata associated with the latest execution of the task.
|
2703
|
-
|
2704
|
-
Returns
|
2705
|
-
-------
|
2706
|
-
Dict[str, str]
|
2707
|
-
Dictionary mapping metadata name with value
|
2708
|
-
"""
|
2709
|
-
...
|
2710
|
-
@property
|
2711
|
-
def index(self) -> typing.Optional[int]:
|
2712
|
-
"""
|
2713
|
-
Returns the index of the innermost foreach loop if this task is run inside at least
|
2714
|
-
one foreach.
|
2715
|
-
|
2716
|
-
The index is what distinguishes the various tasks inside a given step.
|
2717
|
-
This call returns None if this task was not run in a foreach loop.
|
2718
|
-
|
2719
|
-
Returns
|
2720
|
-
-------
|
2721
|
-
int, optional
|
2722
|
-
Index in the innermost loop for this task
|
2723
|
-
"""
|
2724
|
-
...
|
2725
|
-
@property
|
2726
|
-
def data(self) -> metaflow.client.core.MetaflowData:
|
2727
|
-
"""
|
2728
|
-
Returns a container of data artifacts produced by this task.
|
2729
|
-
|
2730
|
-
You can access data produced by this task as follows:
|
2731
|
-
```
|
2732
|
-
print(task.data.my_var)
|
2733
|
-
```
|
2734
|
-
|
2735
|
-
Returns
|
2736
|
-
-------
|
2737
|
-
MetaflowData
|
2738
|
-
Container of all artifacts produced by this task
|
2739
|
-
"""
|
2740
|
-
...
|
2741
|
-
@property
|
2742
|
-
def artifacts(self) -> typing.NamedTuple:
|
2743
|
-
"""
|
2744
|
-
Returns a container of DataArtifacts produced by this task.
|
2745
|
-
|
2746
|
-
You can access each DataArtifact by name like so:
|
2747
|
-
```
|
2748
|
-
print(task.artifacts.my_var)
|
2749
|
-
```
|
2750
|
-
This method differs from data() because it returns DataArtifact objects
|
2751
|
-
(which contain additional metadata) as opposed to just the data.
|
2752
|
-
|
2753
|
-
Returns
|
2754
|
-
-------
|
2755
|
-
MetaflowArtifacts
|
2756
|
-
Container of all DataArtifacts produced by this task
|
2757
|
-
"""
|
2758
|
-
...
|
2759
|
-
@property
|
2760
|
-
def successful(self) -> bool:
|
2761
|
-
"""
|
2762
|
-
Indicates whether or not the task completed successfully.
|
2763
|
-
|
2764
|
-
This information is always about the latest task to have completed (in case
|
2765
|
-
of retries).
|
2766
|
-
|
2767
|
-
Returns
|
2768
|
-
-------
|
2769
|
-
bool
|
2770
|
-
True if the task completed successfully and False otherwise
|
2771
|
-
"""
|
2772
|
-
...
|
2773
|
-
@property
|
2774
|
-
def finished(self) -> bool:
|
2775
|
-
"""
|
2776
|
-
Indicates whether or not the task completed.
|
2777
|
-
|
2778
|
-
This information is always about the latest task to have completed (in case
|
2779
|
-
of retries).
|
2780
|
-
|
2781
|
-
Returns
|
2782
|
-
-------
|
2783
|
-
bool
|
2784
|
-
True if the task completed and False otherwise
|
2785
|
-
"""
|
2786
|
-
...
|
2787
|
-
@property
|
2788
|
-
def exception(self) -> typing.Optional[typing.Any]:
|
2789
|
-
"""
|
2790
|
-
Returns the exception that caused the task to fail, if any.
|
2791
|
-
|
2792
|
-
This information is always about the latest task to have completed (in case
|
2793
|
-
of retries). If successful() returns False and finished() returns True,
|
2794
|
-
this method can help determine what went wrong.
|
2795
|
-
|
2796
|
-
Returns
|
2797
|
-
-------
|
2798
|
-
object
|
2799
|
-
Exception raised by the task or None if not applicable
|
2800
|
-
"""
|
2801
|
-
...
|
2802
|
-
@property
|
2803
|
-
def finished_at(self) -> typing.Optional[datetime.datetime]:
|
2804
|
-
"""
|
2805
|
-
Returns the datetime object of when the task finished (successfully or not).
|
2806
|
-
|
2807
|
-
This information is always about the latest task to have completed (in case
|
2808
|
-
of retries). This call will return None if the task is not finished.
|
2809
|
-
|
2810
|
-
Returns
|
2811
|
-
-------
|
2812
|
-
datetime
|
2813
|
-
Datetime of when the task finished
|
2814
|
-
"""
|
2815
|
-
...
|
2816
|
-
@property
|
2817
|
-
def runtime_name(self) -> typing.Optional[str]:
|
2818
|
-
"""
|
2819
|
-
Returns the name of the runtime this task executed on.
|
2820
|
-
|
2821
|
-
|
2822
|
-
Returns
|
2823
|
-
-------
|
2824
|
-
str
|
2825
|
-
Name of the runtime this task executed on
|
2826
|
-
"""
|
2827
|
-
...
|
2828
|
-
@property
|
2829
|
-
def stdout(self) -> str:
|
2830
|
-
"""
|
2831
|
-
Returns the full standard out of this task.
|
2832
|
-
|
2833
|
-
If you specify a specific attempt for this task, it will return the
|
2834
|
-
standard out for that attempt. If you do not specify an attempt,
|
2835
|
-
this will return the current standard out for the latest *started*
|
2836
|
-
attempt of the task. In both cases, multiple calls to this
|
2837
|
-
method will return the most up-to-date log (so if an attempt is not
|
2838
|
-
done, each call will fetch the latest log).
|
2839
|
-
|
2840
|
-
Returns
|
2841
|
-
-------
|
2842
|
-
str
|
2843
|
-
Standard output of this task
|
2844
|
-
"""
|
2845
|
-
...
|
2846
|
-
@property
|
2847
|
-
def stdout_size(self) -> int:
|
2848
|
-
"""
|
2849
|
-
Returns the size of the stdout log of this task.
|
2850
|
-
|
2851
|
-
Similar to `stdout`, the size returned is the latest size of the log
|
2852
|
-
(so for a running attempt, this value will increase as the task produces
|
2853
|
-
more output).
|
2854
|
-
|
2855
|
-
Returns
|
2856
|
-
-------
|
2857
|
-
int
|
2858
|
-
Size of the stdout log content (in bytes)
|
2859
|
-
"""
|
2860
|
-
...
|
2861
|
-
@property
|
2862
|
-
def stderr(self) -> str:
|
2863
|
-
"""
|
2864
|
-
Returns the full standard error of this task.
|
2865
|
-
|
2866
|
-
If you specify a specific attempt for this task, it will return the
|
2867
|
-
standard error for that attempt. If you do not specify an attempt,
|
2868
|
-
this will return the current standard error for the latest *started*
|
2869
|
-
attempt. In both cases, multiple calls to this
|
2870
|
-
method will return the most up-to-date log (so if an attempt is not
|
2871
|
-
done, each call will fetch the latest log).
|
2872
|
-
|
2873
|
-
Returns
|
2874
|
-
-------
|
2875
|
-
str
|
2876
|
-
Standard error of this task
|
2877
|
-
"""
|
2878
|
-
...
|
2879
|
-
@property
|
2880
|
-
def stderr_size(self) -> int:
|
2881
|
-
"""
|
2882
|
-
Returns the size of the stderr log of this task.
|
2883
|
-
|
2884
|
-
Similar to `stderr`, the size returned is the latest size of the log
|
2885
|
-
(so for a running attempt, this value will increase as the task produces
|
2886
|
-
more output).
|
2887
|
-
|
2888
|
-
Returns
|
2889
|
-
-------
|
2890
|
-
int
|
2891
|
-
Size of the stderr log content (in bytes)
|
2892
|
-
"""
|
2893
|
-
...
|
2894
|
-
@property
|
2895
|
-
def current_attempt(self) -> int:
|
2896
|
-
"""
|
2897
|
-
Get the relevant attempt for this Task.
|
2898
|
-
|
2899
|
-
Returns the specific attempt used when
|
2900
|
-
initializing the instance, or the latest *started* attempt for the Task.
|
2901
|
-
|
2902
|
-
Returns
|
2903
|
-
-------
|
2904
|
-
int
|
2905
|
-
attempt id for this task object
|
2906
|
-
"""
|
2907
|
-
...
|
2908
|
-
@property
|
2909
|
-
def code(self) -> typing.Optional[metaflow.client.core.MetaflowCode]:
|
2910
|
-
"""
|
2911
|
-
Returns the MetaflowCode object for this task, if present.
|
2912
|
-
|
2913
|
-
Not all tasks save their code so this call may return None in those cases.
|
2914
|
-
|
2915
|
-
Returns
|
2916
|
-
-------
|
2917
|
-
MetaflowCode
|
2918
|
-
Code package for this task
|
2919
|
-
"""
|
2920
|
-
...
|
2921
|
-
@property
|
2922
|
-
def environment_info(self) -> typing.Dict[str, typing.Any]:
|
2923
|
-
"""
|
2924
|
-
Returns information about the environment that was used to execute this task. As an
|
2925
|
-
example, if the Conda environment is selected, this will return information about the
|
2926
|
-
dependencies that were used in the environment.
|
2927
|
-
|
2928
|
-
This environment information is only available for tasks that have a code package.
|
2929
|
-
|
2930
|
-
Returns
|
2931
|
-
-------
|
2932
|
-
Dict
|
2933
|
-
Dictionary describing the environment
|
2934
|
-
"""
|
2935
|
-
...
|
2936
|
-
def loglines(self, stream: str, as_unicode: bool = True, meta_dict: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Iterator[typing.Tuple[datetime.datetime, str]]:
|
2937
|
-
"""
|
2938
|
-
Return an iterator over (utc_timestamp, logline) tuples.
|
2939
|
-
|
2940
|
-
Parameters
|
2941
|
-
----------
|
2942
|
-
stream : str
|
2943
|
-
Either 'stdout' or 'stderr'.
|
2944
|
-
as_unicode : bool, default: True
|
2945
|
-
If as_unicode=False, each logline is returned as a byte object. Otherwise,
|
2946
|
-
it is returned as a (unicode) string.
|
2947
|
-
|
2948
|
-
Yields
|
2949
|
-
------
|
2950
|
-
Tuple[datetime, str]
|
2951
|
-
Tuple of timestamp, logline pairs.
|
2952
|
-
"""
|
2953
|
-
...
|
2954
|
-
def __iter__(self) -> typing.Iterator[metaflow.client.core.DataArtifact]:
|
2955
|
-
"""
|
2956
|
-
Iterate over all children DataArtifact of this Task
|
2957
|
-
|
2958
|
-
Yields
|
2959
|
-
------
|
2960
|
-
DataArtifact
|
2961
|
-
A DataArtifact in this Step
|
2962
|
-
"""
|
2963
|
-
...
|
2964
|
-
def __getitem__(self, name: str) -> metaflow.client.core.DataArtifact:
|
2965
|
-
"""
|
2966
|
-
Returns the DataArtifact object with the artifact name 'name'
|
2967
|
-
|
2968
|
-
Parameters
|
2969
|
-
----------
|
2970
|
-
name : str
|
2971
|
-
Data artifact name
|
2972
|
-
|
2973
|
-
Returns
|
2974
|
-
-------
|
2975
|
-
DataArtifact
|
2976
|
-
DataArtifact for this artifact name in this task
|
2977
|
-
|
2978
|
-
Raises
|
2979
|
-
------
|
2980
|
-
KeyError
|
2981
|
-
If the name does not identify a valid DataArtifact object
|
2982
|
-
"""
|
2983
|
-
...
|
2984
|
-
def __getstate__(self):
|
2985
|
-
...
|
2986
|
-
def __setstate__(self, state):
|
2987
|
-
...
|
2988
|
-
...
|
2989
|
-
|
2990
|
-
class DataArtifact(metaflow.client.core.MetaflowObject, metaclass=type):
|
2991
|
-
"""
|
2992
|
-
A single data artifact and associated metadata. Note that this object does
|
2993
|
-
not contain other objects as it is the leaf object in the hierarchy.
|
2994
|
-
|
2995
|
-
Attributes
|
2996
|
-
----------
|
2997
|
-
data : object
|
2998
|
-
The data contained in this artifact, that is, the object produced during
|
2999
|
-
execution of this run.
|
3000
|
-
sha : string
|
3001
|
-
A unique ID of this artifact.
|
3002
|
-
finished_at : datetime
|
3003
|
-
Corresponds roughly to the `Task.finished_at` time of the parent `Task`.
|
3004
|
-
An alias for `DataArtifact.created_at`.
|
3005
|
-
"""
|
3006
|
-
@property
|
3007
|
-
def data(self) -> typing.Any:
|
3008
|
-
"""
|
3009
|
-
Unpickled representation of the data contained in this artifact.
|
3010
|
-
|
3011
|
-
Returns
|
3012
|
-
-------
|
3013
|
-
object
|
3014
|
-
Object contained in this artifact
|
3015
|
-
"""
|
3016
|
-
...
|
3017
|
-
@property
|
3018
|
-
def size(self) -> int:
|
3019
|
-
"""
|
3020
|
-
Returns the size (in bytes) of the pickled object representing this
|
3021
|
-
DataArtifact
|
3022
|
-
|
3023
|
-
Returns
|
3024
|
-
-------
|
3025
|
-
int
|
3026
|
-
size of the pickled representation of data artifact (in bytes)
|
3027
|
-
"""
|
3028
|
-
...
|
3029
|
-
@property
|
3030
|
-
def sha(self) -> str:
|
3031
|
-
"""
|
3032
|
-
Unique identifier for this artifact.
|
3033
|
-
|
3034
|
-
This is a unique hash of the artifact (historically SHA1 hash)
|
3035
|
-
|
3036
|
-
Returns
|
3037
|
-
-------
|
3038
|
-
str
|
3039
|
-
Hash of this artifact
|
3040
|
-
"""
|
3041
|
-
...
|
3042
|
-
@property
|
3043
|
-
def finished_at(self) -> datetime.datetime:
|
3044
|
-
"""
|
3045
|
-
Creation time for this artifact.
|
3046
|
-
|
3047
|
-
Alias for created_at.
|
3048
|
-
|
3049
|
-
Returns
|
3050
|
-
-------
|
3051
|
-
datetime
|
3052
|
-
Creation time
|
3053
|
-
"""
|
3054
|
-
...
|
3055
|
-
def __getstate__(self):
|
3056
|
-
...
|
3057
|
-
def __setstate__(self, state):
|
3058
|
-
...
|
3059
|
-
...
|
3060
|
-
|
3061
|
-
class Runner(object, metaclass=type):
|
3062
|
-
"""
|
3063
|
-
Metaflow's Runner API that presents a programmatic interface
|
3064
|
-
to run flows and perform other operations either synchronously or asynchronously.
|
3065
|
-
The class expects a path to the flow file along with optional arguments
|
3066
|
-
that match top-level options on the command-line.
|
3067
|
-
|
3068
|
-
This class works as a context manager, calling `cleanup()` to remove
|
3069
|
-
temporary files at exit.
|
3070
|
-
|
3071
|
-
Example:
|
3072
|
-
```python
|
3073
|
-
with Runner('slowflow.py', pylint=False) as runner:
|
3074
|
-
result = runner.run(alpha=5, tags=["abc", "def"], max_workers=5)
|
3075
|
-
print(result.run.finished)
|
3076
|
-
```
|
3077
|
-
|
3078
|
-
Parameters
|
3079
|
-
----------
|
3080
|
-
flow_file : str
|
3081
|
-
Path to the flow file to run
|
3082
|
-
show_output : bool, default True
|
3083
|
-
Show the 'stdout' and 'stderr' to the console by default,
|
3084
|
-
Only applicable for synchronous 'run' and 'resume' functions.
|
3085
|
-
profile : Optional[str], default None
|
3086
|
-
Metaflow profile to use to run this run. If not specified, the default
|
3087
|
-
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
3088
|
-
env : Optional[Dict], default None
|
3089
|
-
Additional environment variables to set for the Run. This overrides the
|
3090
|
-
environment set for this process.
|
3091
|
-
cwd : Optional[str], default None
|
3092
|
-
The directory to run the subprocess in; if not specified, the current
|
3093
|
-
directory is used.
|
3094
|
-
file_read_timeout : int, default 3600
|
3095
|
-
The timeout until which we try to read the runner attribute file.
|
3096
|
-
**kwargs : Any
|
3097
|
-
Additional arguments that you would pass to `python myflow.py` before
|
3098
|
-
the `run` command.
|
3099
|
-
"""
|
3100
|
-
def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3101
|
-
...
|
3102
|
-
def __enter__(self) -> metaflow.runner.metaflow_runner.Runner:
|
3103
|
-
...
|
3104
|
-
def __aenter__(self) -> metaflow.runner.metaflow_runner.Runner:
|
3105
|
-
...
|
3106
|
-
def _Runner__get_executing_run(self, tfp_runner_attribute, command_obj):
|
3107
|
-
...
|
3108
|
-
def run(self, **kwargs) -> metaflow.runner.metaflow_runner.ExecutingRun:
|
3109
|
-
"""
|
3110
|
-
Blocking execution of the run. This method will wait until
|
3111
|
-
the run has completed execution.
|
3112
|
-
|
3113
|
-
Parameters
|
3114
|
-
----------
|
3115
|
-
**kwargs : Any
|
3116
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3117
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3118
|
-
|
3119
|
-
Returns
|
3120
|
-
-------
|
3121
|
-
ExecutingRun
|
3122
|
-
ExecutingRun containing the results of the run.
|
3123
|
-
"""
|
3124
|
-
...
|
3125
|
-
def resume(self, **kwargs):
|
3126
|
-
"""
|
3127
|
-
Blocking resume execution of the run.
|
3128
|
-
This method will wait until the resumed run has completed execution.
|
3129
|
-
|
3130
|
-
Parameters
|
3131
|
-
----------
|
3132
|
-
**kwargs : Any
|
3133
|
-
Additional arguments that you would pass to `python ./myflow.py` after
|
3134
|
-
the `resume` command.
|
3135
|
-
|
3136
|
-
Returns
|
3137
|
-
-------
|
3138
|
-
ExecutingRun
|
3139
|
-
ExecutingRun containing the results of the resumed run.
|
3140
|
-
"""
|
3141
|
-
...
|
3142
|
-
def async_run(self, **kwargs) -> metaflow.runner.metaflow_runner.ExecutingRun:
|
3143
|
-
"""
|
3144
|
-
Non-blocking execution of the run. This method will return as soon as the
|
3145
|
-
run has launched.
|
3146
|
-
|
3147
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3148
|
-
|
3149
|
-
Parameters
|
3150
|
-
----------
|
3151
|
-
**kwargs : Any
|
3152
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3153
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3154
|
-
|
3155
|
-
Returns
|
3156
|
-
-------
|
3157
|
-
ExecutingRun
|
3158
|
-
ExecutingRun representing the run that was started.
|
3159
|
-
"""
|
3160
|
-
...
|
3161
|
-
def async_resume(self, **kwargs):
|
3162
|
-
"""
|
3163
|
-
Non-blocking resume execution of the run.
|
3164
|
-
This method will return as soon as the resume has launched.
|
3165
|
-
|
3166
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3167
|
-
|
3168
|
-
Parameters
|
3169
|
-
----------
|
3170
|
-
**kwargs : Any
|
3171
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3172
|
-
the `resume` command.
|
3173
|
-
|
3174
|
-
Returns
|
3175
|
-
-------
|
3176
|
-
ExecutingRun
|
3177
|
-
ExecutingRun representing the resumed run that was started.
|
3178
|
-
"""
|
3179
|
-
...
|
3180
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
3181
|
-
...
|
3182
|
-
def __aexit__(self, exc_type, exc_value, traceback):
|
3183
|
-
...
|
3184
|
-
def cleanup(self):
|
3185
|
-
"""
|
3186
|
-
Delete any temporary files created during execution.
|
3187
|
-
"""
|
3188
|
-
...
|
3189
|
-
...
|
3190
|
-
|
3191
|
-
class NBRunner(object, metaclass=type):
|
3192
|
-
"""
|
3193
|
-
A wrapper over `Runner` for executing flows defined in a Jupyter
|
3194
|
-
notebook cell.
|
3195
|
-
|
3196
|
-
Instantiate this class on the last line of a notebook cell where
|
3197
|
-
a `flow` is defined. In contrast to `Runner`, this class is not
|
3198
|
-
meant to be used in a context manager. Instead, use a blocking helper
|
3199
|
-
function like `nbrun` (which calls `cleanup()` internally) or call
|
3200
|
-
`cleanup()` explictly when using non-blocking APIs.
|
3201
|
-
|
3202
|
-
```python
|
3203
|
-
run = NBRunner(FlowName).nbrun()
|
3204
|
-
```
|
3205
|
-
|
3206
|
-
Parameters
|
3207
|
-
----------
|
3208
|
-
flow : FlowSpec
|
3209
|
-
Flow defined in the same cell
|
3210
|
-
show_output : bool, default True
|
3211
|
-
Show the 'stdout' and 'stderr' to the console by default,
|
3212
|
-
Only applicable for synchronous 'run' and 'resume' functions.
|
3213
|
-
profile : Optional[str], default None
|
3214
|
-
Metaflow profile to use to run this run. If not specified, the default
|
3215
|
-
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
3216
|
-
env : Optional[Dict], default None
|
3217
|
-
Additional environment variables to set for the Run. This overrides the
|
3218
|
-
environment set for this process.
|
3219
|
-
base_dir : Optional[str], default None
|
3220
|
-
The directory to run the subprocess in; if not specified, the current
|
3221
|
-
working directory is used.
|
3222
|
-
file_read_timeout : int, default 3600
|
3223
|
-
The timeout until which we try to read the runner attribute file.
|
3224
|
-
**kwargs : Any
|
3225
|
-
Additional arguments that you would pass to `python myflow.py` before
|
3226
|
-
the `run` command.
|
3227
|
-
"""
|
3228
|
-
def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3229
|
-
...
|
3230
|
-
def nbrun(self, **kwargs):
|
3231
|
-
"""
|
3232
|
-
Blocking execution of the run. This method will wait until
|
3233
|
-
the run has completed execution.
|
3234
|
-
|
3235
|
-
Note that in contrast to `run`, this method returns a
|
3236
|
-
`metaflow.Run` object directly and calls `cleanup()` internally
|
3237
|
-
to support a common notebook pattern of executing a flow and
|
3238
|
-
retrieving its results immediately.
|
3239
|
-
|
3240
|
-
Parameters
|
3241
|
-
----------
|
3242
|
-
**kwargs : Any
|
3243
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3244
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3245
|
-
|
3246
|
-
Returns
|
3247
|
-
-------
|
3248
|
-
Run
|
3249
|
-
A `metaflow.Run` object representing the finished run.
|
3250
|
-
"""
|
3251
|
-
...
|
3252
|
-
def nbresume(self, **kwargs):
|
3253
|
-
"""
|
3254
|
-
Blocking resuming of a run. This method will wait until
|
3255
|
-
the resumed run has completed execution.
|
3256
|
-
|
3257
|
-
Note that in contrast to `resume`, this method returns a
|
3258
|
-
`metaflow.Run` object directly and calls `cleanup()` internally
|
3259
|
-
to support a common notebook pattern of executing a flow and
|
3260
|
-
retrieving its results immediately.
|
3261
|
-
|
3262
|
-
Parameters
|
3263
|
-
----------
|
3264
|
-
**kwargs : Any
|
3265
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3266
|
-
the `resume` command.
|
3267
|
-
|
3268
|
-
Returns
|
3269
|
-
-------
|
3270
|
-
Run
|
3271
|
-
A `metaflow.Run` object representing the resumed run.
|
3272
|
-
"""
|
3273
|
-
...
|
3274
|
-
def run(self, **kwargs):
|
3275
|
-
"""
|
3276
|
-
Runs the flow.
|
3277
|
-
"""
|
3278
|
-
...
|
3279
|
-
def resume(self, **kwargs):
|
3280
|
-
"""
|
3281
|
-
Resumes the flow.
|
3282
|
-
"""
|
3283
|
-
...
|
3284
|
-
def async_run(self, **kwargs):
|
3285
|
-
"""
|
3286
|
-
Non-blocking execution of the run. This method will return as soon as the
|
3287
|
-
run has launched. This method is equivalent to `Runner.async_run`.
|
3288
|
-
|
3289
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3290
|
-
|
3291
|
-
|
3292
|
-
Parameters
|
3293
|
-
----------
|
3294
|
-
**kwargs : Any
|
3295
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3296
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3297
|
-
|
3298
|
-
Returns
|
3299
|
-
-------
|
3300
|
-
ExecutingRun
|
3301
|
-
ExecutingRun representing the run that was started.
|
3302
|
-
"""
|
3303
|
-
...
|
3304
|
-
def async_resume(self, **kwargs):
|
3305
|
-
"""
|
3306
|
-
Non-blocking execution of the run. This method will return as soon as the
|
3307
|
-
run has launched. This method is equivalent to `Runner.async_resume`.
|
3308
|
-
|
3309
|
-
Note that this method is asynchronous and needs to be `await`ed.
|
3310
|
-
|
3311
|
-
Parameters
|
3312
|
-
----------
|
3313
|
-
**kwargs : Any
|
3314
|
-
Additional arguments that you would pass to `python myflow.py` after
|
3315
|
-
the `run` command, in particular, any parameters accepted by the flow.
|
3316
|
-
|
3317
|
-
Returns
|
3318
|
-
-------
|
3319
|
-
ExecutingRun
|
3320
|
-
ExecutingRun representing the run that was started.
|
3321
|
-
"""
|
3322
|
-
...
|
3323
|
-
def cleanup(self):
|
3324
|
-
"""
|
3325
|
-
Delete any temporary files created during execution.
|
3326
|
-
|
3327
|
-
Call this method after using `async_run` or `async_resume`. You don't
|
3328
|
-
have to call this after `nbrun` or `nbresume`.
|
3329
|
-
"""
|
3330
|
-
...
|
3331
|
-
...
|
3332
|
-
|
3333
|
-
class Deployer(object, metaclass=type):
|
3334
|
-
"""
|
3335
|
-
Use the `Deployer` class to configure and access one of the production
|
3336
|
-
orchestrators supported by Metaflow.
|
3337
|
-
|
3338
|
-
Parameters
|
3339
|
-
----------
|
3340
|
-
flow_file : str
|
3341
|
-
Path to the flow file to deploy.
|
3342
|
-
show_output : bool, default True
|
3343
|
-
Show the 'stdout' and 'stderr' to the console by default.
|
3344
|
-
profile : Optional[str], default None
|
3345
|
-
Metaflow profile to use for the deployment. If not specified, the default
|
3346
|
-
profile is used.
|
3347
|
-
env : Optional[Dict[str, str]], default None
|
3348
|
-
Additional environment variables to set for the deployment.
|
3349
|
-
cwd : Optional[str], default None
|
3350
|
-
The directory to run the subprocess in; if not specified, the current
|
3351
|
-
directory is used.
|
3352
|
-
file_read_timeout : int, default 3600
|
3353
|
-
The timeout until which we try to read the deployer attribute file.
|
3354
|
-
**kwargs : Any
|
3355
|
-
Additional arguments that you would pass to `python myflow.py` before
|
3356
|
-
the deployment command.
|
3357
|
-
"""
|
3358
|
-
def __init__(self, flow_file: str, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, cwd: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3359
|
-
...
|
3360
|
-
def _Deployer__make_function(self, deployer_class):
|
3361
|
-
"""
|
3362
|
-
Create a function for the given deployer class.
|
3363
|
-
|
3364
|
-
Parameters
|
3365
|
-
----------
|
3366
|
-
deployer_class : Type[DeployerImpl]
|
3367
|
-
Deployer implementation class.
|
3368
|
-
|
3369
|
-
Returns
|
3370
|
-
-------
|
3371
|
-
Callable
|
3372
|
-
Function that initializes and returns an instance of the deployer class.
|
3373
|
-
"""
|
3374
|
-
...
|
3375
|
-
...
|
3376
|
-
|
3377
|
-
class NBDeployer(object, metaclass=type):
|
3378
|
-
"""
|
3379
|
-
A wrapper over `Deployer` for deploying flows defined in a Jupyter
|
3380
|
-
notebook cell.
|
3381
|
-
|
3382
|
-
Instantiate this class on the last line of a notebook cell where
|
3383
|
-
a `flow` is defined. In contrast to `Deployer`, this class is not
|
3384
|
-
meant to be used in a context manager.
|
3385
|
-
|
3386
|
-
```python
|
3387
|
-
deployer = NBDeployer(FlowName)
|
3388
|
-
ar = deployer.argo_workflows(name="madhur")
|
3389
|
-
ar_obj = ar.create()
|
3390
|
-
result = ar_obj.trigger(alpha=300)
|
3391
|
-
print(result.status)
|
3392
|
-
print(result.run)
|
3393
|
-
result.terminate()
|
3394
|
-
```
|
3395
|
-
|
3396
|
-
Parameters
|
3397
|
-
----------
|
3398
|
-
flow : FlowSpec
|
3399
|
-
Flow defined in the same cell
|
3400
|
-
show_output : bool, default True
|
3401
|
-
Show the 'stdout' and 'stderr' to the console by default,
|
3402
|
-
profile : Optional[str], default None
|
3403
|
-
Metaflow profile to use to deploy this run. If not specified, the default
|
3404
|
-
profile is used (or the one already set using `METAFLOW_PROFILE`)
|
3405
|
-
env : Optional[Dict[str, str]], default None
|
3406
|
-
Additional environment variables to set. This overrides the
|
3407
|
-
environment set for this process.
|
3408
|
-
base_dir : Optional[str], default None
|
3409
|
-
The directory to run the subprocess in; if not specified, the current
|
3410
|
-
working directory is used.
|
3411
|
-
**kwargs : Any
|
3412
|
-
Additional arguments that you would pass to `python myflow.py` i.e. options
|
3413
|
-
listed in `python myflow.py --help`
|
3414
|
-
"""
|
3415
|
-
def __init__(self, flow, show_output: bool = True, profile: typing.Optional[str] = None, env: typing.Optional[typing.Dict] = None, base_dir: typing.Optional[str] = None, file_read_timeout: int = 3600, **kwargs):
|
3416
|
-
...
|
3417
|
-
def cleanup(self):
|
3418
|
-
"""
|
3419
|
-
Delete any temporary files created during execution.
|
3420
|
-
"""
|
3421
|
-
...
|
3422
718
|
...
|
3423
719
|
|