ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/R.py +10 -7
- metaflow/__init__.py +40 -25
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +1070 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +233 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +308 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
- metaflow/_vendor/typeguard/_suppression.py +86 -0
- metaflow/_vendor/typeguard/_transformer.py +1229 -0
- metaflow/_vendor/typeguard/_union_transformer.py +55 -0
- metaflow/_vendor/typeguard/_utils.py +173 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3641 -0
- metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
- metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
- metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
- metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
- metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
- metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
- metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
- metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
- metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
- metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +5 -0
- metaflow/cli.py +331 -785
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +52 -0
- metaflow/cli_components/run_cmds.py +546 -0
- metaflow/cli_components/step_cmd.py +334 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +467 -73
- metaflow/client/filecache.py +75 -35
- metaflow/clone_util.py +7 -1
- metaflow/cmd/code/__init__.py +231 -0
- metaflow/cmd/develop/stub_generator.py +756 -288
- metaflow/cmd/develop/stubs.py +12 -28
- metaflow/cmd/main_cli.py +6 -4
- metaflow/cmd/make_wrapper.py +78 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +41 -10
- metaflow/datastore/datastore_set.py +11 -2
- metaflow/datastore/flow_datastore.py +156 -10
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +154 -39
- metaflow/debug.py +5 -0
- metaflow/decorators.py +404 -78
- metaflow/exception.py +8 -2
- metaflow/extension_support/__init__.py +527 -376
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/extension_support/plugins.py +49 -31
- metaflow/flowspec.py +482 -33
- metaflow/graph.py +210 -42
- metaflow/includefile.py +84 -40
- metaflow/lint.py +141 -22
- metaflow/meta_files.py +13 -0
- metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
- metaflow/{metadata → metadata_provider}/metadata.py +86 -1
- metaflow/metaflow_config.py +175 -28
- metaflow/metaflow_config_funcs.py +51 -3
- metaflow/metaflow_current.py +4 -10
- metaflow/metaflow_environment.py +139 -53
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +150 -66
- metaflow/mflog/__init__.py +4 -3
- metaflow/mflog/save_logs.py +2 -2
- metaflow/multicore_utils.py +31 -14
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +149 -28
- metaflow/plugins/__init__.py +74 -5
- metaflow/plugins/airflow/airflow.py +40 -25
- metaflow/plugins/airflow/airflow_cli.py +22 -5
- metaflow/plugins/airflow/airflow_decorator.py +1 -1
- metaflow/plugins/airflow/airflow_utils.py +5 -3
- metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
- metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
- metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
- metaflow/plugins/argo/argo_client.py +78 -33
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +2410 -527
- metaflow/plugins/argo/argo_workflows_cli.py +571 -121
- metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
- metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
- metaflow/plugins/argo/capture_error.py +73 -0
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/jobset_input_paths.py +15 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +10 -3
- metaflow/plugins/aws/aws_utils.py +55 -2
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +33 -10
- metaflow/plugins/aws/batch/batch_client.py +4 -3
- metaflow/plugins/aws/batch/batch_decorator.py +102 -35
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +65 -8
- metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_tail.py +1 -1
- metaflow/plugins/azure/includefile_support.py +2 -0
- metaflow/plugins/cards/card_cli.py +66 -30
- metaflow/plugins/cards/card_creator.py +25 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +132 -8
- metaflow/plugins/cards/card_modules/basic.py +112 -17
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +665 -28
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +68 -49
- metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
- metaflow/plugins/cards/card_modules/test_cards.py +26 -12
- metaflow/plugins/cards/card_server.py +39 -14
- metaflow/plugins/cards/component_serializer.py +2 -9
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/azure_storage.py +10 -1
- metaflow/plugins/datastores/gs_storage.py +6 -2
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/local.py +2 -0
- metaflow/plugins/datatools/s3/s3.py +126 -75
- metaflow/plugins/datatools/s3/s3op.py +254 -121
- metaflow/plugins/env_escape/__init__.py +3 -3
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +24 -5
- metaflow/plugins/events_decorator.py +343 -185
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/gcp/__init__.py +1 -1
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
- metaflow/plugins/gcp/gs_tail.py +10 -6
- metaflow/plugins/gcp/includefile_support.py +3 -0
- metaflow/plugins/kubernetes/kube_utils.py +108 -0
- metaflow/plugins/kubernetes/kubernetes.py +411 -130
- metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
- metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
- metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
- metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/logs_cli.py +359 -0
- metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
- metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +128 -11
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/project_decorator.py +51 -5
- metaflow/plugins/pypi/bootstrap.py +357 -105
- metaflow/plugins/pypi/conda_decorator.py +82 -81
- metaflow/plugins/pypi/conda_environment.py +187 -52
- metaflow/plugins/pypi/micromamba.py +157 -47
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/pip.py +88 -13
- metaflow/plugins/pypi/pypi_decorator.py +37 -1
- metaflow/plugins/pypi/utils.py +48 -2
- metaflow/plugins/resources_decorator.py +2 -2
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +26 -181
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/tag_cli.py +4 -7
- metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
- metaflow/plugins/timeout_decorator.py +3 -3
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +128 -0
- metaflow/plugins/uv/uv_environment.py +72 -0
- metaflow/procpoll.py +1 -1
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +717 -0
- metaflow/runner/deployer.py +470 -0
- metaflow/runner/deployer_impl.py +201 -0
- metaflow/runner/metaflow_runner.py +714 -0
- metaflow/runner/nbdeploy.py +132 -0
- metaflow/runner/nbrun.py +225 -0
- metaflow/runner/subprocess_manager.py +650 -0
- metaflow/runner/utils.py +335 -0
- metaflow/runtime.py +1078 -260
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/system/__init__.py +5 -0
- metaflow/system/system_logger.py +85 -0
- metaflow/system/system_monitor.py +108 -0
- metaflow/system/system_utils.py +19 -0
- metaflow/task.py +521 -225
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +38 -43
- metaflow/tuple_util.py +27 -0
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_options.py +563 -0
- metaflow/user_configs/config_parameters.py +598 -0
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +243 -27
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
- ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
- ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/package.py +0 -188
- ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
- ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
- /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
- /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
- /metaflow/{metadata → metadata_provider}/util.py +0 -0
- /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -7,11 +7,12 @@ import traceback
|
|
|
7
7
|
from metaflow import util
|
|
8
8
|
from metaflow import R
|
|
9
9
|
from metaflow.exception import CommandException, METAFLOW_EXIT_DISALLOW_RETRY
|
|
10
|
-
from metaflow.
|
|
10
|
+
from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
|
|
11
11
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
12
12
|
from metaflow.mflog import TASK_LOG_SOURCE
|
|
13
|
-
|
|
13
|
+
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
14
14
|
from .batch import Batch, BatchKilledException
|
|
15
|
+
from ..aws_utils import validate_aws_tag
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
@click.group()
|
|
@@ -47,7 +48,7 @@ def _execute_cmd(func, flow_name, run_id, user, my_runs, echo):
|
|
|
47
48
|
func(flow_name, run_id, user, echo)
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
@batch.command(help="List unfinished AWS Batch tasks of this flow")
|
|
51
|
+
@batch.command("list", help="List unfinished AWS Batch tasks of this flow")
|
|
51
52
|
@click.option(
|
|
52
53
|
"--my-runs",
|
|
53
54
|
default=False,
|
|
@@ -61,7 +62,7 @@ def _execute_cmd(func, flow_name, run_id, user, my_runs, echo):
|
|
|
61
62
|
help="List unfinished tasks corresponding to the run id.",
|
|
62
63
|
)
|
|
63
64
|
@click.pass_context
|
|
64
|
-
def
|
|
65
|
+
def _list(ctx, run_id, user, my_runs):
|
|
65
66
|
batch = Batch(ctx.obj.metadata, ctx.obj.environment)
|
|
66
67
|
_execute_cmd(
|
|
67
68
|
batch.list_jobs, ctx.obj.flow.name, run_id, user, my_runs, ctx.obj.echo
|
|
@@ -100,6 +101,7 @@ def kill(ctx, run_id, user, my_runs):
|
|
|
100
101
|
"Metaflow."
|
|
101
102
|
)
|
|
102
103
|
@click.argument("step-name")
|
|
104
|
+
@click.argument("code-package-metadata")
|
|
103
105
|
@click.argument("code-package-sha")
|
|
104
106
|
@click.argument("code-package-url")
|
|
105
107
|
@click.option("--executable", help="Executable requirement for AWS Batch.")
|
|
@@ -146,12 +148,21 @@ def kill(ctx, run_id, user, my_runs):
|
|
|
146
148
|
help="Activate designated number of elastic fabric adapter devices. "
|
|
147
149
|
"EFA driver must be installed and instance type compatible with EFA",
|
|
148
150
|
)
|
|
151
|
+
@click.option(
|
|
152
|
+
"--aws-batch-tag",
|
|
153
|
+
"aws_batch_tags",
|
|
154
|
+
multiple=True,
|
|
155
|
+
default=None,
|
|
156
|
+
help="AWS tags. Format: key=value, multiple allowed",
|
|
157
|
+
)
|
|
149
158
|
@click.option("--use-tmpfs", is_flag=True, help="tmpfs requirement for AWS Batch.")
|
|
150
159
|
@click.option("--tmpfs-tempdir", is_flag=True, help="tmpfs requirement for AWS Batch.")
|
|
151
160
|
@click.option("--tmpfs-size", help="tmpfs requirement for AWS Batch.")
|
|
152
161
|
@click.option("--tmpfs-path", help="tmpfs requirement for AWS Batch.")
|
|
153
|
-
#
|
|
154
|
-
@click.option(
|
|
162
|
+
# NOTE: ubf-context is not explicitly used, but @parallel decorator tries to pass this so keep it for now
|
|
163
|
+
@click.option(
|
|
164
|
+
"--ubf-context", default=None, type=click.Choice(["none", UBF_CONTROL, UBF_TASK])
|
|
165
|
+
)
|
|
155
166
|
@click.option("--host-volumes", multiple=True)
|
|
156
167
|
@click.option("--efs-volumes", multiple=True)
|
|
157
168
|
@click.option(
|
|
@@ -183,6 +194,7 @@ def kill(ctx, run_id, user, my_runs):
|
|
|
183
194
|
def step(
|
|
184
195
|
ctx,
|
|
185
196
|
step_name,
|
|
197
|
+
code_package_metadata,
|
|
186
198
|
code_package_sha,
|
|
187
199
|
code_package_url,
|
|
188
200
|
executable=None,
|
|
@@ -199,6 +211,7 @@ def step(
|
|
|
199
211
|
swappiness=None,
|
|
200
212
|
inferentia=None,
|
|
201
213
|
efa=None,
|
|
214
|
+
aws_batch_tags=None,
|
|
202
215
|
use_tmpfs=None,
|
|
203
216
|
tmpfs_tempdir=None,
|
|
204
217
|
tmpfs_size=None,
|
|
@@ -271,11 +284,19 @@ def step(
|
|
|
271
284
|
"metaflow_version"
|
|
272
285
|
]
|
|
273
286
|
|
|
287
|
+
env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
|
|
288
|
+
|
|
289
|
+
if aws_batch_tags is not None:
|
|
290
|
+
# We do not need to validate these again,
|
|
291
|
+
# as they come supplied by the batch decorator which already performed validation.
|
|
292
|
+
batch_tags = {}
|
|
293
|
+
for item in list(aws_batch_tags):
|
|
294
|
+
key, value = item.split("=")
|
|
295
|
+
batch_tags[key] = value
|
|
296
|
+
|
|
274
297
|
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
|
|
275
298
|
if env_deco:
|
|
276
|
-
env
|
|
277
|
-
else:
|
|
278
|
-
env = {}
|
|
299
|
+
env.update(env_deco[0].attributes["vars"])
|
|
279
300
|
|
|
280
301
|
# Add the environment variables related to the input-paths argument
|
|
281
302
|
if split_vars:
|
|
@@ -315,6 +336,7 @@ def step(
|
|
|
315
336
|
step_name,
|
|
316
337
|
step_cli,
|
|
317
338
|
task_spec,
|
|
339
|
+
code_package_metadata,
|
|
318
340
|
code_package_sha,
|
|
319
341
|
code_package_url,
|
|
320
342
|
ctx.obj.flow_datastore.TYPE,
|
|
@@ -336,6 +358,7 @@ def step(
|
|
|
336
358
|
host_volumes=host_volumes,
|
|
337
359
|
efs_volumes=efs_volumes,
|
|
338
360
|
use_tmpfs=use_tmpfs,
|
|
361
|
+
aws_batch_tags=batch_tags,
|
|
339
362
|
tmpfs_tempdir=tmpfs_tempdir,
|
|
340
363
|
tmpfs_size=tmpfs_size,
|
|
341
364
|
tmpfs_path=tmpfs_path,
|
|
@@ -344,7 +367,7 @@ def step(
|
|
|
344
367
|
log_options=log_options,
|
|
345
368
|
num_parallel=num_parallel,
|
|
346
369
|
)
|
|
347
|
-
except Exception
|
|
370
|
+
except Exception:
|
|
348
371
|
traceback.print_exc()
|
|
349
372
|
_sync_metadata()
|
|
350
373
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
from collections import defaultdict
|
|
2
|
+
from collections import defaultdict
|
|
3
3
|
import copy
|
|
4
4
|
import random
|
|
5
|
-
import select
|
|
6
|
-
import sys
|
|
7
5
|
import time
|
|
8
6
|
import hashlib
|
|
9
7
|
|
|
@@ -89,6 +87,9 @@ class BatchJob(object):
|
|
|
89
87
|
# Multinode
|
|
90
88
|
if getattr(self, "num_parallel", 0) >= 1:
|
|
91
89
|
num_nodes = self.num_parallel
|
|
90
|
+
# We need this task-id set so that all the nodes are aware of the control
|
|
91
|
+
# task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
|
|
92
|
+
self.environment_variable("MF_PARALLEL_CONTROL_TASK_ID", self._task_id)
|
|
92
93
|
main_task_override = copy.deepcopy(self.payload["containerOverrides"])
|
|
93
94
|
|
|
94
95
|
# main
|
|
@@ -1,34 +1,33 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import platform
|
|
4
|
-
import
|
|
3
|
+
import sys
|
|
5
4
|
import time
|
|
6
5
|
|
|
7
|
-
from metaflow import util
|
|
8
6
|
from metaflow import R, current
|
|
9
|
-
|
|
10
7
|
from metaflow.decorators import StepDecorator
|
|
11
|
-
from metaflow.
|
|
12
|
-
from metaflow.
|
|
13
|
-
from metaflow.metadata import MetaDatum
|
|
14
|
-
from metaflow.metadata.util import sync_local_metadata_to_datastore
|
|
8
|
+
from metaflow.metadata_provider import MetaDatum
|
|
9
|
+
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
|
|
15
10
|
from metaflow.metaflow_config import (
|
|
16
|
-
ECS_S3_ACCESS_IAM_ROLE,
|
|
17
|
-
BATCH_JOB_QUEUE,
|
|
18
11
|
BATCH_CONTAINER_IMAGE,
|
|
19
12
|
BATCH_CONTAINER_REGISTRY,
|
|
20
|
-
|
|
13
|
+
BATCH_DEFAULT_TAGS,
|
|
14
|
+
BATCH_JOB_QUEUE,
|
|
21
15
|
DATASTORE_LOCAL_DIR,
|
|
16
|
+
ECS_FARGATE_EXECUTION_ROLE,
|
|
17
|
+
ECS_S3_ACCESS_IAM_ROLE,
|
|
18
|
+
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
|
|
22
19
|
)
|
|
20
|
+
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
23
21
|
from metaflow.sidecar import Sidecar
|
|
24
22
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
|
25
23
|
|
|
26
|
-
from .batch import BatchException
|
|
27
24
|
from ..aws_utils import (
|
|
28
25
|
compute_resource_attributes,
|
|
29
26
|
get_docker_registry,
|
|
30
27
|
get_ec2_instance_metadata,
|
|
28
|
+
validate_aws_tag,
|
|
31
29
|
)
|
|
30
|
+
from .batch import BatchException
|
|
32
31
|
|
|
33
32
|
|
|
34
33
|
class BatchDecorator(StepDecorator):
|
|
@@ -71,6 +70,9 @@ class BatchDecorator(StepDecorator):
|
|
|
71
70
|
A swappiness value of 0 causes swapping not to happen unless absolutely
|
|
72
71
|
necessary. A swappiness value of 100 causes pages to be swapped very
|
|
73
72
|
aggressively. Accepted values are whole numbers between 0 and 100.
|
|
73
|
+
aws_batch_tags: Dict[str, str], optional, default None
|
|
74
|
+
Sets arbitrary AWS tags on the AWS Batch compute environment.
|
|
75
|
+
Set as string key-value pairs.
|
|
74
76
|
use_tmpfs : bool, default False
|
|
75
77
|
This enables an explicit tmpfs mount for this step. Note that tmpfs is
|
|
76
78
|
not available on Fargate compute environments
|
|
@@ -88,15 +90,15 @@ class BatchDecorator(StepDecorator):
|
|
|
88
90
|
Alias for inferentia. Use only one of the two.
|
|
89
91
|
efa : int, default 0
|
|
90
92
|
Number of elastic fabric adapter network devices to attach to container
|
|
91
|
-
ephemeral_storage: int, default None
|
|
92
|
-
The total amount, in GiB, of ephemeral storage to set for the task
|
|
93
|
+
ephemeral_storage : int, default None
|
|
94
|
+
The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
|
|
93
95
|
This is only relevant for Fargate compute environments
|
|
94
96
|
log_driver: str, optional, default None
|
|
95
97
|
The log driver to use for the Amazon ECS container.
|
|
96
98
|
log_options: List[str], optional, default None
|
|
97
99
|
List of strings containing options for the chosen log driver. The configurable values
|
|
98
100
|
depend on the `log driver` chosen. Validation of these options is not supported yet.
|
|
99
|
-
Example
|
|
101
|
+
Example: [`awslogs-group:aws/batch/job`]
|
|
100
102
|
"""
|
|
101
103
|
|
|
102
104
|
name = "batch"
|
|
@@ -117,25 +119,30 @@ class BatchDecorator(StepDecorator):
|
|
|
117
119
|
"host_volumes": None,
|
|
118
120
|
"efs_volumes": None,
|
|
119
121
|
"use_tmpfs": False,
|
|
122
|
+
"aws_batch_tags": None,
|
|
120
123
|
"tmpfs_tempdir": True,
|
|
121
124
|
"tmpfs_size": None,
|
|
122
125
|
"tmpfs_path": "/metaflow_temp",
|
|
123
126
|
"ephemeral_storage": None,
|
|
124
127
|
"log_driver": None,
|
|
125
128
|
"log_options": None,
|
|
129
|
+
"executable": None,
|
|
126
130
|
}
|
|
127
131
|
resource_defaults = {
|
|
128
132
|
"cpu": "1",
|
|
129
133
|
"gpu": "0",
|
|
130
134
|
"memory": "4096",
|
|
131
135
|
}
|
|
136
|
+
package_metadata = None
|
|
132
137
|
package_url = None
|
|
133
138
|
package_sha = None
|
|
134
139
|
run_time_limit = None
|
|
135
140
|
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
# Conda environment support
|
|
142
|
+
supports_conda_environment = True
|
|
143
|
+
target_platform = "linux-64"
|
|
138
144
|
|
|
145
|
+
def init(self):
|
|
139
146
|
# If no docker image is explicitly specified, impute a default image.
|
|
140
147
|
if not self.attributes["image"]:
|
|
141
148
|
# If metaflow-config specifies a docker image, just use that.
|
|
@@ -174,6 +181,29 @@ class BatchDecorator(StepDecorator):
|
|
|
174
181
|
if self.attributes["trainium"] is not None:
|
|
175
182
|
self.attributes["inferentia"] = self.attributes["trainium"]
|
|
176
183
|
|
|
184
|
+
if not isinstance(BATCH_DEFAULT_TAGS, dict) and not all(
|
|
185
|
+
isinstance(k, str) and isinstance(v, str)
|
|
186
|
+
for k, v in BATCH_DEFAULT_TAGS.items()
|
|
187
|
+
):
|
|
188
|
+
raise BatchException(
|
|
189
|
+
"BATCH_DEFAULT_TAGS environment variable must be Dict[str, str]"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if self.attributes["aws_batch_tags"] is not None:
|
|
193
|
+
if not isinstance(self.attributes["aws_batch_tags"], dict) and not all(
|
|
194
|
+
isinstance(k, str) and isinstance(v, str)
|
|
195
|
+
for k, v in self.attributes["aws_batch_tags"].items()
|
|
196
|
+
):
|
|
197
|
+
raise BatchException("aws_batch_tags must be Dict[str, str]")
|
|
198
|
+
else:
|
|
199
|
+
self.attributes["aws_batch_tags"] = {}
|
|
200
|
+
|
|
201
|
+
if BATCH_DEFAULT_TAGS:
|
|
202
|
+
self.attributes["aws_batch_tags"] = {
|
|
203
|
+
**BATCH_DEFAULT_TAGS,
|
|
204
|
+
**self.attributes["aws_batch_tags"],
|
|
205
|
+
}
|
|
206
|
+
|
|
177
207
|
# clean up the alias attribute so it is not passed on.
|
|
178
208
|
self.attributes.pop("trainium", None)
|
|
179
209
|
|
|
@@ -206,6 +236,11 @@ class BatchDecorator(StepDecorator):
|
|
|
206
236
|
if self.attributes["tmpfs_path"] and self.attributes["tmpfs_path"][0] != "/":
|
|
207
237
|
raise BatchException("'tmpfs_path' needs to be an absolute path")
|
|
208
238
|
|
|
239
|
+
# Validate Batch tags
|
|
240
|
+
if self.attributes["aws_batch_tags"]:
|
|
241
|
+
for key, val in self.attributes["aws_batch_tags"].items():
|
|
242
|
+
validate_aws_tag(key, val)
|
|
243
|
+
|
|
209
244
|
def runtime_init(self, flow, graph, package, run_id):
|
|
210
245
|
# Set some more internal state.
|
|
211
246
|
self.flow = flow
|
|
@@ -227,10 +262,20 @@ class BatchDecorator(StepDecorator):
|
|
|
227
262
|
# to execute on AWS Batch anymore. We can execute possible fallback
|
|
228
263
|
# code locally.
|
|
229
264
|
cli_args.commands = ["batch", "step"]
|
|
265
|
+
cli_args.command_args.append(self.package_metadata)
|
|
230
266
|
cli_args.command_args.append(self.package_sha)
|
|
231
267
|
cli_args.command_args.append(self.package_url)
|
|
232
|
-
|
|
268
|
+
# skip certain keys as CLI arguments
|
|
269
|
+
_skip_keys = ["aws_batch_tags"]
|
|
270
|
+
cli_args.command_options.update(
|
|
271
|
+
{k: v for k, v in self.attributes.items() if k not in _skip_keys}
|
|
272
|
+
)
|
|
233
273
|
cli_args.command_options["run-time-limit"] = self.run_time_limit
|
|
274
|
+
|
|
275
|
+
# Pass the supplied AWS batch tags to the step CLI cmd
|
|
276
|
+
cli_args.command_options["aws-batch-tag"] = [
|
|
277
|
+
"%s=%s" % (k, v) for k, v in self.attributes["aws_batch_tags"].items()
|
|
278
|
+
]
|
|
234
279
|
if not R.use_r():
|
|
235
280
|
cli_args.entrypoint[0] = sys.executable
|
|
236
281
|
|
|
@@ -261,8 +306,8 @@ class BatchDecorator(StepDecorator):
|
|
|
261
306
|
# metadata. A rudimentary way to detect non-local execution is to
|
|
262
307
|
# check for the existence of AWS_BATCH_JOB_ID environment variable.
|
|
263
308
|
|
|
309
|
+
meta = {}
|
|
264
310
|
if "AWS_BATCH_JOB_ID" in os.environ:
|
|
265
|
-
meta = {}
|
|
266
311
|
meta["aws-batch-job-id"] = os.environ["AWS_BATCH_JOB_ID"]
|
|
267
312
|
meta["aws-batch-job-attempt"] = os.environ["AWS_BATCH_JOB_ATTEMPT"]
|
|
268
313
|
meta["aws-batch-ce-name"] = os.environ["AWS_BATCH_CE_NAME"]
|
|
@@ -275,6 +320,10 @@ class BatchDecorator(StepDecorator):
|
|
|
275
320
|
# Metaflow would be running the container agent compatible with
|
|
276
321
|
# version V4.
|
|
277
322
|
# https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
|
|
323
|
+
|
|
324
|
+
# TODO: Remove dependency on requests
|
|
325
|
+
import requests
|
|
326
|
+
|
|
278
327
|
try:
|
|
279
328
|
logs_meta = (
|
|
280
329
|
requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
|
|
@@ -290,21 +339,16 @@ class BatchDecorator(StepDecorator):
|
|
|
290
339
|
instance_meta = get_ec2_instance_metadata()
|
|
291
340
|
meta.update(instance_meta)
|
|
292
341
|
|
|
293
|
-
entries = [
|
|
294
|
-
MetaDatum(
|
|
295
|
-
field=k,
|
|
296
|
-
value=v,
|
|
297
|
-
type=k,
|
|
298
|
-
tags=["attempt_id:{0}".format(retry_count)],
|
|
299
|
-
)
|
|
300
|
-
for k, v in meta.items()
|
|
301
|
-
]
|
|
302
|
-
# Register book-keeping metadata for debugging.
|
|
303
|
-
metadata.register_metadata(run_id, step_name, task_id, entries)
|
|
304
|
-
|
|
305
342
|
self._save_logs_sidecar = Sidecar("save_logs_periodically")
|
|
306
343
|
self._save_logs_sidecar.start()
|
|
307
344
|
|
|
345
|
+
# Start spot termination monitor sidecar.
|
|
346
|
+
current._update_env(
|
|
347
|
+
{"spot_termination_notice": "/tmp/spot_termination_notice"}
|
|
348
|
+
)
|
|
349
|
+
self._spot_monitor_sidecar = Sidecar("spot_termination_monitor")
|
|
350
|
+
self._spot_monitor_sidecar.start()
|
|
351
|
+
|
|
308
352
|
num_parallel = int(os.environ.get("AWS_BATCH_JOB_NUM_NODES", 0))
|
|
309
353
|
if num_parallel >= 1 and ubf_context == UBF_CONTROL:
|
|
310
354
|
# UBF handling for multinode case
|
|
@@ -322,6 +366,21 @@ class BatchDecorator(StepDecorator):
|
|
|
322
366
|
|
|
323
367
|
if num_parallel >= 1:
|
|
324
368
|
_setup_multinode_environment()
|
|
369
|
+
# current.parallel.node_index will be correctly available over here.
|
|
370
|
+
meta.update({"parallel-node-index": current.parallel.node_index})
|
|
371
|
+
|
|
372
|
+
if len(meta) > 0:
|
|
373
|
+
entries = [
|
|
374
|
+
MetaDatum(
|
|
375
|
+
field=k,
|
|
376
|
+
value=v,
|
|
377
|
+
type=k,
|
|
378
|
+
tags=["attempt_id:{0}".format(retry_count)],
|
|
379
|
+
)
|
|
380
|
+
for k, v in meta.items()
|
|
381
|
+
]
|
|
382
|
+
# Register book-keeping metadata for debugging.
|
|
383
|
+
metadata.register_metadata(run_id, step_name, task_id, entries)
|
|
325
384
|
|
|
326
385
|
def task_finished(
|
|
327
386
|
self, step_name, flow, graph, is_task_ok, retry_count, max_retries
|
|
@@ -342,6 +401,7 @@ class BatchDecorator(StepDecorator):
|
|
|
342
401
|
|
|
343
402
|
try:
|
|
344
403
|
self._save_logs_sidecar.terminate()
|
|
404
|
+
self._spot_monitor_sidecar.terminate()
|
|
345
405
|
except:
|
|
346
406
|
# Best effort kill
|
|
347
407
|
pass
|
|
@@ -378,7 +438,7 @@ class BatchDecorator(StepDecorator):
|
|
|
378
438
|
len(flow._control_mapper_tasks),
|
|
379
439
|
)
|
|
380
440
|
)
|
|
381
|
-
except Exception
|
|
441
|
+
except Exception:
|
|
382
442
|
pass
|
|
383
443
|
raise Exception(
|
|
384
444
|
"Batch secondary workers did not finish in %s seconds" % TIMEOUT
|
|
@@ -387,9 +447,16 @@ class BatchDecorator(StepDecorator):
|
|
|
387
447
|
@classmethod
|
|
388
448
|
def _save_package_once(cls, flow_datastore, package):
|
|
389
449
|
if cls.package_url is None:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
450
|
+
if not FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
|
|
451
|
+
cls.package_url, cls.package_sha = flow_datastore.save_data(
|
|
452
|
+
[package.blob], len_hint=1
|
|
453
|
+
)[0]
|
|
454
|
+
cls.package_metadata = package.package_metadata
|
|
455
|
+
else:
|
|
456
|
+
# Blocks until the package is uploaded
|
|
457
|
+
cls.package_url = package.package_url()
|
|
458
|
+
cls.package_sha = package.package_sha()
|
|
459
|
+
cls.package_metadata = package.package_metadata
|
|
393
460
|
|
|
394
461
|
|
|
395
462
|
def _setup_multinode_environment():
|
|
@@ -50,24 +50,27 @@ class AwsSecretsManagerSecretsProvider(SecretsProvider):
|
|
|
50
50
|
The secret payload from AWS is EITHER a string OR a binary blob.
|
|
51
51
|
|
|
52
52
|
If the secret contains a string payload ("SecretString"):
|
|
53
|
-
- if the `
|
|
53
|
+
- if the `json` option is True (default):
|
|
54
54
|
{SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
|
55
55
|
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
|
56
56
|
always be casted to a string (if not already a string).
|
|
57
|
-
- If `
|
|
58
|
-
{SecretString} will be returned as a single entry in the result,
|
|
57
|
+
- If `json` option is False:
|
|
58
|
+
{SecretString} will be returned as a single entry in the result, where the key is either:
|
|
59
|
+
- the `secret_id`, OR
|
|
60
|
+
- the value set by `options={"env_var_name": custom_env_var_name}`.
|
|
59
61
|
|
|
60
|
-
Otherwise, the secret contains a binary blob payload ("SecretBinary")
|
|
61
|
-
- The result
|
|
62
|
+
Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
|
|
63
|
+
- The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
|
|
62
64
|
|
|
63
|
-
All keys in the result are sanitized to be more valid environment variable names. This is done on a best
|
|
65
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
|
|
64
66
|
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
|
65
67
|
|
|
66
|
-
:param secret_id: ARN or friendly name of the secret
|
|
67
|
-
:param options:
|
|
68
|
-
:param role: AWS IAM Role ARN to assume before reading the secret
|
|
69
|
-
:return:
|
|
68
|
+
:param secret_id: ARN or friendly name of the secret.
|
|
69
|
+
:param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
|
|
70
|
+
:param role: AWS IAM Role ARN to assume before reading the secret.
|
|
71
|
+
:return: Dictionary of environment variables. All keys and values are strings.
|
|
70
72
|
"""
|
|
73
|
+
|
|
71
74
|
import botocore
|
|
72
75
|
from metaflow.plugins.aws.aws_client import get_aws_client
|
|
73
76
|
|