ob-metaflow 2.12.30.2__py2.py3-none-any.whl → 2.13.6.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +3 -0
- metaflow/cards.py +1 -0
- metaflow/cli.py +185 -717
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +51 -0
- metaflow/cli_components/run_cmds.py +362 -0
- metaflow/cli_components/step_cmd.py +176 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/cmd/develop/stub_generator.py +9 -2
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/decorators.py +63 -2
- metaflow/exception.py +8 -2
- metaflow/extension_support/plugins.py +42 -27
- metaflow/flowspec.py +176 -23
- metaflow/graph.py +28 -27
- metaflow/includefile.py +50 -22
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +10 -1
- metaflow/multicore_utils.py +31 -14
- metaflow/package.py +17 -3
- metaflow/parameters.py +97 -25
- metaflow/plugins/__init__.py +22 -0
- metaflow/plugins/airflow/airflow.py +18 -17
- metaflow/plugins/airflow/airflow_cli.py +1 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +195 -132
- metaflow/plugins/argo/argo_workflows_cli.py +1 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +51 -9
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +13 -13
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +33 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +7 -9
- metaflow/plugins/cards/card_cli.py +7 -2
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +79 -8
- metaflow/plugins/cards/card_modules/basic.py +56 -5
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/components.py +64 -16
- metaflow/plugins/cards/card_modules/main.js +27 -25
- metaflow/plugins/cards/card_modules/test_cards.py +4 -4
- metaflow/plugins/cards/component_serializer.py +1 -1
- metaflow/plugins/datatools/s3/s3.py +12 -4
- metaflow/plugins/datatools/s3/s3op.py +3 -3
- metaflow/plugins/events_decorator.py +338 -186
- metaflow/plugins/kubernetes/kube_utils.py +84 -1
- metaflow/plugins/kubernetes/kubernetes.py +40 -92
- metaflow/plugins/kubernetes/kubernetes_cli.py +32 -7
- metaflow/plugins/kubernetes/kubernetes_decorator.py +76 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +23 -20
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +41 -20
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/parallel_decorator.py +4 -1
- metaflow/plugins/project_decorator.py +33 -5
- metaflow/plugins/pypi/bootstrap.py +249 -81
- metaflow/plugins/pypi/conda_decorator.py +20 -10
- metaflow/plugins/pypi/conda_environment.py +83 -27
- metaflow/plugins/pypi/micromamba.py +82 -37
- metaflow/plugins/pypi/pip.py +9 -6
- metaflow/plugins/pypi/pypi_decorator.py +11 -9
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/plugins/timeout_decorator.py +2 -2
- metaflow/runner/click_api.py +240 -50
- metaflow/runner/deployer.py +1 -1
- metaflow/runner/deployer_impl.py +12 -11
- metaflow/runner/metaflow_runner.py +68 -34
- metaflow/runner/nbdeploy.py +2 -0
- metaflow/runner/nbrun.py +1 -1
- metaflow/runner/subprocess_manager.py +61 -10
- metaflow/runner/utils.py +208 -44
- metaflow/runtime.py +216 -112
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/tracing/tracing_modules.py +4 -1
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_decorators.py +563 -0
- metaflow/user_configs/config_options.py +548 -0
- metaflow/user_configs/config_parameters.py +436 -0
- metaflow/util.py +22 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/METADATA +12 -3
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/RECORD +96 -84
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.30.2.dist-info → ob_metaflow-2.13.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import json
|
|
3
|
+
import time
|
|
3
4
|
import tempfile
|
|
4
5
|
from typing import ClassVar, Optional
|
|
5
6
|
|
|
@@ -10,7 +11,7 @@ from metaflow.metaflow_config import KUBERNETES_NAMESPACE
|
|
|
10
11
|
from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
|
|
11
12
|
from metaflow.runner.deployer import Deployer, DeployedFlow, TriggeredRun
|
|
12
13
|
|
|
13
|
-
from metaflow.runner.utils import get_lower_level_group, handle_timeout
|
|
14
|
+
from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def generate_fake_flow_file_contents(
|
|
@@ -97,6 +98,7 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
|
|
|
97
98
|
)
|
|
98
99
|
|
|
99
100
|
command_obj = self.deployer.spm.get(pid)
|
|
101
|
+
command_obj.sync_wait()
|
|
100
102
|
return command_obj.process.returncode == 0
|
|
101
103
|
|
|
102
104
|
def unsuspend(self, **kwargs) -> bool:
|
|
@@ -131,6 +133,7 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
|
|
|
131
133
|
)
|
|
132
134
|
|
|
133
135
|
command_obj = self.deployer.spm.get(pid)
|
|
136
|
+
command_obj.sync_wait()
|
|
134
137
|
return command_obj.process.returncode == 0
|
|
135
138
|
|
|
136
139
|
def terminate(self, **kwargs) -> bool:
|
|
@@ -165,8 +168,50 @@ class ArgoWorkflowsTriggeredRun(TriggeredRun):
|
|
|
165
168
|
)
|
|
166
169
|
|
|
167
170
|
command_obj = self.deployer.spm.get(pid)
|
|
171
|
+
command_obj.sync_wait()
|
|
168
172
|
return command_obj.process.returncode == 0
|
|
169
173
|
|
|
174
|
+
def wait_for_completion(self, timeout: Optional[int] = None):
|
|
175
|
+
"""
|
|
176
|
+
Wait for the workflow to complete or timeout.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
timeout : int, optional, default None
|
|
181
|
+
Maximum time in seconds to wait for workflow completion.
|
|
182
|
+
If None, waits indefinitely.
|
|
183
|
+
|
|
184
|
+
Raises
|
|
185
|
+
------
|
|
186
|
+
TimeoutError
|
|
187
|
+
If the workflow does not complete within the specified timeout period.
|
|
188
|
+
"""
|
|
189
|
+
start_time = time.time()
|
|
190
|
+
check_interval = 5
|
|
191
|
+
while self.is_running:
|
|
192
|
+
if timeout is not None and (time.time() - start_time) > timeout:
|
|
193
|
+
raise TimeoutError(
|
|
194
|
+
"Workflow did not complete within specified timeout."
|
|
195
|
+
)
|
|
196
|
+
time.sleep(check_interval)
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def is_running(self):
|
|
200
|
+
"""
|
|
201
|
+
Check if the workflow is currently running.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
bool
|
|
206
|
+
True if the workflow status is either 'Pending' or 'Running',
|
|
207
|
+
False otherwise.
|
|
208
|
+
"""
|
|
209
|
+
workflow_status = self.status
|
|
210
|
+
# full list of all states present here:
|
|
211
|
+
# https://github.com/argoproj/argo-workflows/blob/main/pkg/apis/workflow/v1alpha1/workflow_types.go#L54
|
|
212
|
+
# we only consider non-terminal states to determine if the workflow has not finished
|
|
213
|
+
return workflow_status is not None and workflow_status in ["Pending", "Running"]
|
|
214
|
+
|
|
170
215
|
@property
|
|
171
216
|
def status(self) -> Optional[str]:
|
|
172
217
|
"""
|
|
@@ -319,6 +364,7 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
|
|
|
319
364
|
)
|
|
320
365
|
|
|
321
366
|
command_obj = self.deployer.spm.get(pid)
|
|
367
|
+
command_obj.sync_wait()
|
|
322
368
|
return command_obj.process.returncode == 0
|
|
323
369
|
|
|
324
370
|
def trigger(self, **kwargs) -> ArgoWorkflowsTriggeredRun:
|
|
@@ -341,18 +387,14 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
|
|
|
341
387
|
Exception
|
|
342
388
|
If there is an error during the trigger process.
|
|
343
389
|
"""
|
|
344
|
-
with
|
|
345
|
-
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
|
346
|
-
dir=temp_dir, delete=False
|
|
347
|
-
)
|
|
348
|
-
|
|
390
|
+
with temporary_fifo() as (attribute_file_path, attribute_file_fd):
|
|
349
391
|
# every subclass needs to have `self.deployer_kwargs`
|
|
350
392
|
command = get_lower_level_group(
|
|
351
393
|
self.deployer.api,
|
|
352
394
|
self.deployer.top_level_kwargs,
|
|
353
395
|
self.deployer.TYPE,
|
|
354
396
|
self.deployer.deployer_kwargs,
|
|
355
|
-
).trigger(deployer_attribute_file=
|
|
397
|
+
).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
|
|
356
398
|
|
|
357
399
|
pid = self.deployer.spm.run_command(
|
|
358
400
|
[sys.executable, *command],
|
|
@@ -363,9 +405,9 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
|
|
|
363
405
|
|
|
364
406
|
command_obj = self.deployer.spm.get(pid)
|
|
365
407
|
content = handle_timeout(
|
|
366
|
-
|
|
408
|
+
attribute_file_fd, command_obj, self.deployer.file_read_timeout
|
|
367
409
|
)
|
|
368
|
-
|
|
410
|
+
command_obj.sync_wait()
|
|
369
411
|
if command_obj.process.returncode == 0:
|
|
370
412
|
return ArgoWorkflowsTriggeredRun(
|
|
371
413
|
deployer=self.deployer, content=content
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import requests
|
|
3
2
|
|
|
4
3
|
from metaflow.exception import MetaflowException
|
|
5
4
|
from metaflow.metaflow_config import MAX_MEMORY_PER_TASK, MAX_CPU_PER_TASK
|
|
@@ -31,6 +30,10 @@ def get_ec2_instance_metadata():
|
|
|
31
30
|
- ec2-region
|
|
32
31
|
- ec2-availability-zone
|
|
33
32
|
"""
|
|
33
|
+
|
|
34
|
+
# TODO: Remove dependency on requests
|
|
35
|
+
import requests
|
|
36
|
+
|
|
34
37
|
meta = {}
|
|
35
38
|
# Capture AWS instance identity metadata. This is best-effort only since
|
|
36
39
|
# access to this end-point might be blocked on AWS and not available
|
|
@@ -160,6 +163,8 @@ def compute_resource_attributes(decos, compute_deco, step_name, resource_default
|
|
|
160
163
|
# Here we don't have ints, so we compare the value and raise
|
|
161
164
|
# an exception if not equal
|
|
162
165
|
if my_val != v:
|
|
166
|
+
# TODO: Throw a better exception since the user has no
|
|
167
|
+
# knowledge of 'compute' decorator
|
|
163
168
|
raise MetaflowException(
|
|
164
169
|
"'resources' and compute decorator have conflicting "
|
|
165
170
|
"values for '%s'. Please use consistent values or "
|
|
@@ -1,34 +1,30 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import platform
|
|
4
|
-
import
|
|
3
|
+
import sys
|
|
5
4
|
import time
|
|
6
5
|
|
|
7
|
-
from metaflow import util
|
|
8
6
|
from metaflow import R, current
|
|
9
|
-
|
|
10
7
|
from metaflow.decorators import StepDecorator
|
|
11
|
-
from metaflow.plugins.resources_decorator import ResourcesDecorator
|
|
12
|
-
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
13
8
|
from metaflow.metadata_provider import MetaDatum
|
|
14
9
|
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
|
|
15
10
|
from metaflow.metaflow_config import (
|
|
16
|
-
ECS_S3_ACCESS_IAM_ROLE,
|
|
17
|
-
BATCH_JOB_QUEUE,
|
|
18
11
|
BATCH_CONTAINER_IMAGE,
|
|
19
12
|
BATCH_CONTAINER_REGISTRY,
|
|
20
|
-
|
|
13
|
+
BATCH_JOB_QUEUE,
|
|
21
14
|
DATASTORE_LOCAL_DIR,
|
|
15
|
+
ECS_FARGATE_EXECUTION_ROLE,
|
|
16
|
+
ECS_S3_ACCESS_IAM_ROLE,
|
|
22
17
|
)
|
|
18
|
+
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
23
19
|
from metaflow.sidecar import Sidecar
|
|
24
20
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
|
25
21
|
|
|
26
|
-
from .batch import BatchException
|
|
27
22
|
from ..aws_utils import (
|
|
28
23
|
compute_resource_attributes,
|
|
29
24
|
get_docker_registry,
|
|
30
25
|
get_ec2_instance_metadata,
|
|
31
26
|
)
|
|
27
|
+
from .batch import BatchException
|
|
32
28
|
|
|
33
29
|
|
|
34
30
|
class BatchDecorator(StepDecorator):
|
|
@@ -138,8 +134,8 @@ class BatchDecorator(StepDecorator):
|
|
|
138
134
|
supports_conda_environment = True
|
|
139
135
|
target_platform = "linux-64"
|
|
140
136
|
|
|
141
|
-
def
|
|
142
|
-
super(BatchDecorator, self).
|
|
137
|
+
def init(self):
|
|
138
|
+
super(BatchDecorator, self).init()
|
|
143
139
|
|
|
144
140
|
# If no docker image is explicitly specified, impute a default image.
|
|
145
141
|
if not self.attributes["image"]:
|
|
@@ -280,6 +276,10 @@ class BatchDecorator(StepDecorator):
|
|
|
280
276
|
# Metaflow would be running the container agent compatible with
|
|
281
277
|
# version V4.
|
|
282
278
|
# https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
|
|
279
|
+
|
|
280
|
+
# TODO: Remove dependency on requests
|
|
281
|
+
import requests
|
|
282
|
+
|
|
283
283
|
try:
|
|
284
284
|
logs_meta = (
|
|
285
285
|
requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
|
|
@@ -386,7 +386,7 @@ class BatchDecorator(StepDecorator):
|
|
|
386
386
|
len(flow._control_mapper_tasks),
|
|
387
387
|
)
|
|
388
388
|
)
|
|
389
|
-
except Exception
|
|
389
|
+
except Exception:
|
|
390
390
|
pass
|
|
391
391
|
raise Exception(
|
|
392
392
|
"Batch secondary workers did not finish in %s seconds" % TIMEOUT
|
|
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
|
|
|
50
50
|
The secret payload from AWS is EITHER a string OR a binary blob.
|
|
51
51
|
|
|
52
52
|
If the secret contains a string payload ("SecretString"):
|
|
53
|
-
- if the `
|
|
53
|
+
- if the `json` option is True (default):
|
|
54
54
|
{SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
|
55
55
|
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
|
56
56
|
always be casted to a string (if not already a string).
|
|
57
|
-
- If `
|
|
58
|
-
{SecretString} will be returned as a single entry in the result,
|
|
57
|
+
- If `json` option is False:
|
|
58
|
+
{SecretString} will be returned as a single entry in the result, where the key is either:
|
|
59
|
+
- the `secret_id`, OR
|
|
60
|
+
- the value set by `options={"env_var_name": custom_env_var_name}`.
|
|
59
61
|
|
|
60
|
-
Otherwise, the secret contains a binary blob payload ("SecretBinary")
|
|
61
|
-
- The result
|
|
62
|
+
Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
|
|
63
|
+
- The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
|
|
62
64
|
|
|
63
|
-
All keys in the result are sanitized to be more valid environment variable names. This is done on a best
|
|
65
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
|
|
64
66
|
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
|
65
67
|
|
|
66
|
-
:param secret_id: ARN or friendly name of the secret
|
|
67
|
-
:param options:
|
|
68
|
-
:param role: AWS IAM Role ARN to assume before reading the secret
|
|
69
|
-
:return:
|
|
68
|
+
:param secret_id: ARN or friendly name of the secret.
|
|
69
|
+
:param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
|
|
70
|
+
:param role: AWS IAM Role ARN to assume before reading the secret.
|
|
71
|
+
:return: Dictionary of environment variables. All keys and values are strings.
|
|
70
72
|
"""
|
|
73
|
+
|
|
71
74
|
import botocore
|
|
72
75
|
from metaflow.plugins.aws.aws_client import get_aws_client
|
|
73
76
|
|
|
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
|
|
|
18
18
|
SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH,
|
|
19
19
|
)
|
|
20
20
|
from metaflow.parameters import deploy_time_eval
|
|
21
|
+
from metaflow.user_configs.config_options import ConfigInput
|
|
21
22
|
from metaflow.util import dict_to_cli_options, to_pascalcase
|
|
22
23
|
|
|
23
24
|
from ..batch.batch import Batch
|
|
@@ -71,6 +72,7 @@ class StepFunctions(object):
|
|
|
71
72
|
self.username = username
|
|
72
73
|
self.max_workers = max_workers
|
|
73
74
|
self.workflow_timeout = workflow_timeout
|
|
75
|
+
self.config_parameters = self._process_config_parameters()
|
|
74
76
|
|
|
75
77
|
# https://aws.amazon.com/blogs/aws/step-functions-distributed-map-a-serverless-solution-for-large-scale-parallel-data-processing/
|
|
76
78
|
self.use_distributed_map = use_distributed_map
|
|
@@ -234,7 +236,7 @@ class StepFunctions(object):
|
|
|
234
236
|
return parameters.get("metaflow.owner"), parameters.get(
|
|
235
237
|
"metaflow.production_token"
|
|
236
238
|
)
|
|
237
|
-
except KeyError
|
|
239
|
+
except KeyError:
|
|
238
240
|
raise StepFunctionsException(
|
|
239
241
|
"An existing non-metaflow "
|
|
240
242
|
"workflow with the same name as "
|
|
@@ -485,6 +487,10 @@ class StepFunctions(object):
|
|
|
485
487
|
"case-insensitive." % param.name
|
|
486
488
|
)
|
|
487
489
|
seen.add(norm)
|
|
490
|
+
# NOTE: We skip config parameters as these do not have dynamic values,
|
|
491
|
+
# and need to be treated differently.
|
|
492
|
+
if param.IS_CONFIG_PARAMETER:
|
|
493
|
+
continue
|
|
488
494
|
|
|
489
495
|
is_required = param.kwargs.get("required", False)
|
|
490
496
|
# Throw an exception if a schedule is set for a flow with required
|
|
@@ -501,6 +507,27 @@ class StepFunctions(object):
|
|
|
501
507
|
parameters.append(dict(name=param.name, value=value))
|
|
502
508
|
return parameters
|
|
503
509
|
|
|
510
|
+
def _process_config_parameters(self):
|
|
511
|
+
parameters = []
|
|
512
|
+
seen = set()
|
|
513
|
+
for var, param in self.flow._get_parameters():
|
|
514
|
+
if not param.IS_CONFIG_PARAMETER:
|
|
515
|
+
continue
|
|
516
|
+
# Throw an exception if the parameter is specified twice.
|
|
517
|
+
norm = param.name.lower()
|
|
518
|
+
if norm in seen:
|
|
519
|
+
raise MetaflowException(
|
|
520
|
+
"Parameter *%s* is specified twice. "
|
|
521
|
+
"Note that parameter names are "
|
|
522
|
+
"case-insensitive." % param.name
|
|
523
|
+
)
|
|
524
|
+
seen.add(norm)
|
|
525
|
+
|
|
526
|
+
parameters.append(
|
|
527
|
+
dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
|
|
528
|
+
)
|
|
529
|
+
return parameters
|
|
530
|
+
|
|
504
531
|
def _batch(self, node):
|
|
505
532
|
attrs = {
|
|
506
533
|
# metaflow.user is only used for setting the AWS Job Name.
|
|
@@ -747,6 +774,11 @@ class StepFunctions(object):
|
|
|
747
774
|
metaflow_version["production_token"] = self.production_token
|
|
748
775
|
env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
|
|
749
776
|
|
|
777
|
+
# map config values
|
|
778
|
+
cfg_env = {param["name"]: param["kv_name"] for param in self.config_parameters}
|
|
779
|
+
if cfg_env:
|
|
780
|
+
env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
|
|
781
|
+
|
|
750
782
|
# Set AWS DynamoDb Table Name for state tracking for for-eaches.
|
|
751
783
|
# There are three instances when metaflow runtime directly interacts
|
|
752
784
|
# with AWS DynamoDB.
|
|
@@ -4,7 +4,6 @@ import re
|
|
|
4
4
|
from hashlib import sha1
|
|
5
5
|
|
|
6
6
|
from metaflow import JSONType, current, decorators, parameters
|
|
7
|
-
from metaflow.client.core import get_metadata
|
|
8
7
|
from metaflow._vendor import click
|
|
9
8
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
|
10
9
|
from metaflow.metaflow_config import (
|
|
@@ -326,6 +325,7 @@ def make_flow(
|
|
|
326
325
|
|
|
327
326
|
# Attach AWS Batch decorator to the flow
|
|
328
327
|
decorators._attach_decorators(obj.flow, [BatchDecorator.name])
|
|
328
|
+
decorators._init(obj.flow)
|
|
329
329
|
decorators._init_step_decorators(
|
|
330
330
|
obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger
|
|
331
331
|
)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import json
|
|
3
|
-
import tempfile
|
|
4
3
|
from typing import ClassVar, Optional, List
|
|
5
4
|
|
|
6
5
|
from metaflow.plugins.aws.step_functions.step_functions import StepFunctions
|
|
7
6
|
from metaflow.runner.deployer import DeployedFlow, TriggeredRun
|
|
8
7
|
|
|
9
|
-
from metaflow.runner.utils import get_lower_level_group, handle_timeout
|
|
8
|
+
from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class StepFunctionsTriggeredRun(TriggeredRun):
|
|
@@ -46,6 +45,7 @@ class StepFunctionsTriggeredRun(TriggeredRun):
|
|
|
46
45
|
)
|
|
47
46
|
|
|
48
47
|
command_obj = self.deployer.spm.get(pid)
|
|
48
|
+
command_obj.sync_wait()
|
|
49
49
|
return command_obj.process.returncode == 0
|
|
50
50
|
|
|
51
51
|
|
|
@@ -174,6 +174,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
174
174
|
)
|
|
175
175
|
|
|
176
176
|
command_obj = self.deployer.spm.get(pid)
|
|
177
|
+
command_obj.sync_wait()
|
|
177
178
|
return command_obj.process.returncode == 0
|
|
178
179
|
|
|
179
180
|
def trigger(self, **kwargs) -> StepFunctionsTriggeredRun:
|
|
@@ -196,18 +197,14 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
196
197
|
Exception
|
|
197
198
|
If there is an error during the trigger process.
|
|
198
199
|
"""
|
|
199
|
-
with
|
|
200
|
-
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
|
201
|
-
dir=temp_dir, delete=False
|
|
202
|
-
)
|
|
203
|
-
|
|
200
|
+
with temporary_fifo() as (attribute_file_path, attribute_file_fd):
|
|
204
201
|
# every subclass needs to have `self.deployer_kwargs`
|
|
205
202
|
command = get_lower_level_group(
|
|
206
203
|
self.deployer.api,
|
|
207
204
|
self.deployer.top_level_kwargs,
|
|
208
205
|
self.deployer.TYPE,
|
|
209
206
|
self.deployer.deployer_kwargs,
|
|
210
|
-
).trigger(deployer_attribute_file=
|
|
207
|
+
).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
|
|
211
208
|
|
|
212
209
|
pid = self.deployer.spm.run_command(
|
|
213
210
|
[sys.executable, *command],
|
|
@@ -218,9 +215,10 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
218
215
|
|
|
219
216
|
command_obj = self.deployer.spm.get(pid)
|
|
220
217
|
content = handle_timeout(
|
|
221
|
-
|
|
218
|
+
attribute_file_fd, command_obj, self.deployer.file_read_timeout
|
|
222
219
|
)
|
|
223
220
|
|
|
221
|
+
command_obj.sync_wait()
|
|
224
222
|
if command_obj.process.returncode == 0:
|
|
225
223
|
return StepFunctionsTriggeredRun(
|
|
226
224
|
deployer=self.deployer, content=content
|
|
@@ -691,10 +691,15 @@ def create(
|
|
|
691
691
|
try:
|
|
692
692
|
if options is not None:
|
|
693
693
|
mf_card = filtered_card(
|
|
694
|
-
options=options,
|
|
694
|
+
options=options,
|
|
695
|
+
components=component_arr,
|
|
696
|
+
graph=graph_dict,
|
|
697
|
+
flow=ctx.obj.flow,
|
|
695
698
|
)
|
|
696
699
|
else:
|
|
697
|
-
mf_card = filtered_card(
|
|
700
|
+
mf_card = filtered_card(
|
|
701
|
+
components=component_arr, graph=graph_dict, flow=ctx.obj.flow
|
|
702
|
+
)
|
|
698
703
|
except TypeError as e:
|
|
699
704
|
if render_error_card:
|
|
700
705
|
mf_card = None
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
1
6
|
from metaflow.decorators import StepDecorator
|
|
2
7
|
from metaflow.metaflow_current import current
|
|
8
|
+
from metaflow.user_configs.config_options import ConfigInput
|
|
9
|
+
from metaflow.user_configs.config_parameters import dump_config_values
|
|
3
10
|
from metaflow.util import to_unicode
|
|
11
|
+
|
|
4
12
|
from .component_serializer import CardComponentCollector, get_card_class
|
|
5
13
|
from .card_creator import CardCreator
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# from metaflow import get_metadata
|
|
9
|
-
import re
|
|
10
|
-
|
|
11
14
|
from .exception import CARD_ID_PATTERN, TYPE_CHECK_REGEX
|
|
12
15
|
|
|
13
16
|
ASYNC_TIMEOUT = 30
|
|
@@ -73,6 +76,12 @@ class CardDecorator(StepDecorator):
|
|
|
73
76
|
|
|
74
77
|
card_creator = None
|
|
75
78
|
|
|
79
|
+
_config_values = None
|
|
80
|
+
|
|
81
|
+
_config_file_name = None
|
|
82
|
+
|
|
83
|
+
task_finished_decos = 0
|
|
84
|
+
|
|
76
85
|
def __init__(self, *args, **kwargs):
|
|
77
86
|
super(CardDecorator, self).__init__(*args, **kwargs)
|
|
78
87
|
self._task_datastore = None
|
|
@@ -103,6 +112,25 @@ class CardDecorator(StepDecorator):
|
|
|
103
112
|
def _increment_step_counter(cls):
|
|
104
113
|
cls.step_counter += 1
|
|
105
114
|
|
|
115
|
+
@classmethod
|
|
116
|
+
def _increment_completed_counter(cls):
|
|
117
|
+
cls.task_finished_decos += 1
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def _set_config_values(cls, config_values):
|
|
121
|
+
cls._config_values = config_values
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def _set_config_file_name(cls, flow):
|
|
125
|
+
# Only create a config file from the very first card decorator.
|
|
126
|
+
if cls._config_values and not cls._config_file_name:
|
|
127
|
+
with tempfile.NamedTemporaryFile(
|
|
128
|
+
mode="w", encoding="utf-8", delete=False
|
|
129
|
+
) as config_file:
|
|
130
|
+
config_value = dump_config_values(flow)
|
|
131
|
+
json.dump(config_value, config_file)
|
|
132
|
+
cls._config_file_name = config_file.name
|
|
133
|
+
|
|
106
134
|
def step_init(
|
|
107
135
|
self, flow, graph, step_name, decorators, environment, flow_datastore, logger
|
|
108
136
|
):
|
|
@@ -111,6 +139,16 @@ class CardDecorator(StepDecorator):
|
|
|
111
139
|
self._logger = logger
|
|
112
140
|
self.card_options = None
|
|
113
141
|
|
|
142
|
+
# We check for configuration options. We do this here before they are
|
|
143
|
+
# converted to properties.
|
|
144
|
+
self._set_config_values(
|
|
145
|
+
[
|
|
146
|
+
(config.name, ConfigInput.make_key_name(config.name))
|
|
147
|
+
for _, config in flow._get_parameters()
|
|
148
|
+
if config.IS_CONFIG_PARAMETER
|
|
149
|
+
]
|
|
150
|
+
)
|
|
151
|
+
|
|
114
152
|
self.card_options = self.attributes["options"]
|
|
115
153
|
|
|
116
154
|
evt_name = "step-init"
|
|
@@ -146,6 +184,14 @@ class CardDecorator(StepDecorator):
|
|
|
146
184
|
self._task_datastore = task_datastore
|
|
147
185
|
self._metadata = metadata
|
|
148
186
|
|
|
187
|
+
# If we have configs, we need to dump them to a file so we can re-use them
|
|
188
|
+
# when calling the card creation subprocess.
|
|
189
|
+
# Since a step can contain multiple card decorators, and all the card creation processes
|
|
190
|
+
# will reference the same config file (because of how the CardCreator is created (only single class instance)),
|
|
191
|
+
# we need to ensure that a single config file is being referenced for all card create commands.
|
|
192
|
+
# This config file will be removed when the last card decorator has finished creating its card.
|
|
193
|
+
self._set_config_file_name(flow)
|
|
194
|
+
|
|
149
195
|
card_type = self.attributes["type"]
|
|
150
196
|
card_class = get_card_class(card_type)
|
|
151
197
|
|
|
@@ -179,7 +225,7 @@ class CardDecorator(StepDecorator):
|
|
|
179
225
|
# we need to ensure that `current.card` has `CardComponentCollector` instantiated only once.
|
|
180
226
|
if not self._is_event_registered("pre-step"):
|
|
181
227
|
self._register_event("pre-step")
|
|
182
|
-
self._set_card_creator(CardCreator(self._create_top_level_args()))
|
|
228
|
+
self._set_card_creator(CardCreator(self._create_top_level_args(flow)))
|
|
183
229
|
|
|
184
230
|
current._update_env(
|
|
185
231
|
{"card": CardComponentCollector(self._logger, self.card_creator)}
|
|
@@ -223,6 +269,8 @@ class CardDecorator(StepDecorator):
|
|
|
223
269
|
self.card_creator.create(mode="render", final=True, **create_options)
|
|
224
270
|
self.card_creator.create(mode="refresh", final=True, **create_options)
|
|
225
271
|
|
|
272
|
+
self._cleanup(step_name)
|
|
273
|
+
|
|
226
274
|
@staticmethod
|
|
227
275
|
def _options(mapping):
|
|
228
276
|
for k, v in mapping.items():
|
|
@@ -232,9 +280,13 @@ class CardDecorator(StepDecorator):
|
|
|
232
280
|
for value in v:
|
|
233
281
|
yield "--%s" % k
|
|
234
282
|
if not isinstance(value, bool):
|
|
235
|
-
|
|
283
|
+
if isinstance(value, tuple):
|
|
284
|
+
for val in value:
|
|
285
|
+
yield to_unicode(val)
|
|
286
|
+
else:
|
|
287
|
+
yield to_unicode(value)
|
|
236
288
|
|
|
237
|
-
def _create_top_level_args(self):
|
|
289
|
+
def _create_top_level_args(self, flow):
|
|
238
290
|
top_level_options = {
|
|
239
291
|
"quiet": True,
|
|
240
292
|
"metadata": self._metadata.TYPE,
|
|
@@ -247,4 +299,23 @@ class CardDecorator(StepDecorator):
|
|
|
247
299
|
# We don't provide --with as all execution is taking place in
|
|
248
300
|
# the context of the main process
|
|
249
301
|
}
|
|
302
|
+
if self._config_values:
|
|
303
|
+
top_level_options["config-value"] = self._config_values
|
|
304
|
+
top_level_options["local-config-file"] = self._config_file_name
|
|
305
|
+
|
|
250
306
|
return list(self._options(top_level_options))
|
|
307
|
+
|
|
308
|
+
def task_exception(
|
|
309
|
+
self, exception, step_name, flow, graph, retry_count, max_user_code_retries
|
|
310
|
+
):
|
|
311
|
+
self._cleanup(step_name)
|
|
312
|
+
|
|
313
|
+
def _cleanup(self, step_name):
|
|
314
|
+
self._increment_completed_counter()
|
|
315
|
+
if self.task_finished_decos == self.total_decos_on_step[step_name]:
|
|
316
|
+
# Unlink the config file if it exists
|
|
317
|
+
if self._config_file_name:
|
|
318
|
+
try:
|
|
319
|
+
os.unlink(self._config_file_name)
|
|
320
|
+
except Exception as e:
|
|
321
|
+
pass
|