ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/R.py +10 -7
- metaflow/__init__.py +40 -25
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +1070 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +233 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +308 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
- metaflow/_vendor/typeguard/_suppression.py +86 -0
- metaflow/_vendor/typeguard/_transformer.py +1229 -0
- metaflow/_vendor/typeguard/_union_transformer.py +55 -0
- metaflow/_vendor/typeguard/_utils.py +173 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3641 -0
- metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
- metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
- metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
- metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
- metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
- metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
- metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
- metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
- metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
- metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +5 -0
- metaflow/cli.py +331 -785
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +52 -0
- metaflow/cli_components/run_cmds.py +546 -0
- metaflow/cli_components/step_cmd.py +334 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +467 -73
- metaflow/client/filecache.py +75 -35
- metaflow/clone_util.py +7 -1
- metaflow/cmd/code/__init__.py +231 -0
- metaflow/cmd/develop/stub_generator.py +756 -288
- metaflow/cmd/develop/stubs.py +12 -28
- metaflow/cmd/main_cli.py +6 -4
- metaflow/cmd/make_wrapper.py +78 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +41 -10
- metaflow/datastore/datastore_set.py +11 -2
- metaflow/datastore/flow_datastore.py +156 -10
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +154 -39
- metaflow/debug.py +5 -0
- metaflow/decorators.py +404 -78
- metaflow/exception.py +8 -2
- metaflow/extension_support/__init__.py +527 -376
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/extension_support/plugins.py +49 -31
- metaflow/flowspec.py +482 -33
- metaflow/graph.py +210 -42
- metaflow/includefile.py +84 -40
- metaflow/lint.py +141 -22
- metaflow/meta_files.py +13 -0
- metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
- metaflow/{metadata → metadata_provider}/metadata.py +86 -1
- metaflow/metaflow_config.py +175 -28
- metaflow/metaflow_config_funcs.py +51 -3
- metaflow/metaflow_current.py +4 -10
- metaflow/metaflow_environment.py +139 -53
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +150 -66
- metaflow/mflog/__init__.py +4 -3
- metaflow/mflog/save_logs.py +2 -2
- metaflow/multicore_utils.py +31 -14
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +149 -28
- metaflow/plugins/__init__.py +74 -5
- metaflow/plugins/airflow/airflow.py +40 -25
- metaflow/plugins/airflow/airflow_cli.py +22 -5
- metaflow/plugins/airflow/airflow_decorator.py +1 -1
- metaflow/plugins/airflow/airflow_utils.py +5 -3
- metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
- metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
- metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
- metaflow/plugins/argo/argo_client.py +78 -33
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +2410 -527
- metaflow/plugins/argo/argo_workflows_cli.py +571 -121
- metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
- metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
- metaflow/plugins/argo/capture_error.py +73 -0
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/jobset_input_paths.py +15 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +10 -3
- metaflow/plugins/aws/aws_utils.py +55 -2
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +33 -10
- metaflow/plugins/aws/batch/batch_client.py +4 -3
- metaflow/plugins/aws/batch/batch_decorator.py +102 -35
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +65 -8
- metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_tail.py +1 -1
- metaflow/plugins/azure/includefile_support.py +2 -0
- metaflow/plugins/cards/card_cli.py +66 -30
- metaflow/plugins/cards/card_creator.py +25 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +132 -8
- metaflow/plugins/cards/card_modules/basic.py +112 -17
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +665 -28
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +68 -49
- metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
- metaflow/plugins/cards/card_modules/test_cards.py +26 -12
- metaflow/plugins/cards/card_server.py +39 -14
- metaflow/plugins/cards/component_serializer.py +2 -9
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/azure_storage.py +10 -1
- metaflow/plugins/datastores/gs_storage.py +6 -2
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/local.py +2 -0
- metaflow/plugins/datatools/s3/s3.py +126 -75
- metaflow/plugins/datatools/s3/s3op.py +254 -121
- metaflow/plugins/env_escape/__init__.py +3 -3
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +24 -5
- metaflow/plugins/events_decorator.py +343 -185
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/gcp/__init__.py +1 -1
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
- metaflow/plugins/gcp/gs_tail.py +10 -6
- metaflow/plugins/gcp/includefile_support.py +3 -0
- metaflow/plugins/kubernetes/kube_utils.py +108 -0
- metaflow/plugins/kubernetes/kubernetes.py +411 -130
- metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
- metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
- metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
- metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/logs_cli.py +359 -0
- metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
- metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +128 -11
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/project_decorator.py +51 -5
- metaflow/plugins/pypi/bootstrap.py +357 -105
- metaflow/plugins/pypi/conda_decorator.py +82 -81
- metaflow/plugins/pypi/conda_environment.py +187 -52
- metaflow/plugins/pypi/micromamba.py +157 -47
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/pip.py +88 -13
- metaflow/plugins/pypi/pypi_decorator.py +37 -1
- metaflow/plugins/pypi/utils.py +48 -2
- metaflow/plugins/resources_decorator.py +2 -2
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +26 -181
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/tag_cli.py +4 -7
- metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
- metaflow/plugins/timeout_decorator.py +3 -3
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +128 -0
- metaflow/plugins/uv/uv_environment.py +72 -0
- metaflow/procpoll.py +1 -1
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +717 -0
- metaflow/runner/deployer.py +470 -0
- metaflow/runner/deployer_impl.py +201 -0
- metaflow/runner/metaflow_runner.py +714 -0
- metaflow/runner/nbdeploy.py +132 -0
- metaflow/runner/nbrun.py +225 -0
- metaflow/runner/subprocess_manager.py +650 -0
- metaflow/runner/utils.py +335 -0
- metaflow/runtime.py +1078 -260
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/system/__init__.py +5 -0
- metaflow/system/system_logger.py +85 -0
- metaflow/system/system_monitor.py +108 -0
- metaflow/system/system_utils.py +19 -0
- metaflow/task.py +521 -225
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +38 -43
- metaflow/tuple_util.py +27 -0
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_options.py +563 -0
- metaflow/user_configs/config_parameters.py +598 -0
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +243 -27
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
- ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
- ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/package.py +0 -188
- ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
- ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
- /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
- /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
- /metaflow/{metadata → metadata_provider}/util.py +0 -0
- /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -3,16 +3,25 @@ import sys
|
|
|
3
3
|
import time
|
|
4
4
|
import traceback
|
|
5
5
|
|
|
6
|
+
from metaflow.plugins.kubernetes.kube_utils import (
|
|
7
|
+
parse_cli_options,
|
|
8
|
+
parse_kube_keyvalue_list,
|
|
9
|
+
)
|
|
10
|
+
from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
|
|
11
|
+
import metaflow.tracing as tracing
|
|
6
12
|
from metaflow import JSONTypeClass, util
|
|
7
13
|
from metaflow._vendor import click
|
|
8
|
-
from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY,
|
|
9
|
-
from metaflow.
|
|
10
|
-
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
14
|
+
from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, MetaflowException
|
|
15
|
+
from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
|
|
16
|
+
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
|
11
17
|
from metaflow.mflog import TASK_LOG_SOURCE
|
|
12
|
-
|
|
18
|
+
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
13
19
|
|
|
14
|
-
from .kubernetes import
|
|
15
|
-
|
|
20
|
+
from .kubernetes import (
|
|
21
|
+
Kubernetes,
|
|
22
|
+
KubernetesException,
|
|
23
|
+
KubernetesKilledException,
|
|
24
|
+
)
|
|
16
25
|
|
|
17
26
|
|
|
18
27
|
@click.group()
|
|
@@ -25,13 +34,14 @@ def kubernetes():
|
|
|
25
34
|
pass
|
|
26
35
|
|
|
27
36
|
|
|
28
|
-
@tracing.cli_entrypoint("kubernetes/step")
|
|
29
37
|
@kubernetes.command(
|
|
30
38
|
help="Execute a single task on Kubernetes. This command calls the top-level step "
|
|
31
39
|
"command inside a Kubernetes pod with the given options. Typically you do not call "
|
|
32
40
|
"this command directly; it is used internally by Metaflow."
|
|
33
41
|
)
|
|
42
|
+
@tracing.cli("kubernetes/step")
|
|
34
43
|
@click.argument("step-name")
|
|
44
|
+
@click.argument("code-package-metadata")
|
|
35
45
|
@click.argument("code-package-sha")
|
|
36
46
|
@click.argument("code-package-url")
|
|
37
47
|
@click.option(
|
|
@@ -44,6 +54,12 @@ def kubernetes():
|
|
|
44
54
|
default=None,
|
|
45
55
|
help="Optional Docker Image Pull Policy for Kubernetes pod.",
|
|
46
56
|
)
|
|
57
|
+
@click.option(
|
|
58
|
+
"--image-pull-secrets",
|
|
59
|
+
default=None,
|
|
60
|
+
type=JSONTypeClass(),
|
|
61
|
+
multiple=False,
|
|
62
|
+
)
|
|
47
63
|
@click.option(
|
|
48
64
|
"--service-account",
|
|
49
65
|
help="IRSA requirement for Kubernetes pod.",
|
|
@@ -107,6 +123,23 @@ def kubernetes():
|
|
|
107
123
|
type=JSONTypeClass(),
|
|
108
124
|
multiple=False,
|
|
109
125
|
)
|
|
126
|
+
@click.option("--shared-memory", default=None, help="Size of shared memory in MiB")
|
|
127
|
+
@click.option("--port", default=None, help="Port number to expose from the container")
|
|
128
|
+
@click.option(
|
|
129
|
+
"--ubf-context", default=None, type=click.Choice([None, UBF_CONTROL, UBF_TASK])
|
|
130
|
+
)
|
|
131
|
+
@click.option(
|
|
132
|
+
"--num-parallel",
|
|
133
|
+
default=None,
|
|
134
|
+
type=int,
|
|
135
|
+
help="Number of parallel nodes to run as a multi-node job.",
|
|
136
|
+
)
|
|
137
|
+
@click.option(
|
|
138
|
+
"--qos",
|
|
139
|
+
default=None,
|
|
140
|
+
type=str,
|
|
141
|
+
help="Quality of Service class for the Kubernetes pod",
|
|
142
|
+
)
|
|
110
143
|
@click.option(
|
|
111
144
|
"--labels",
|
|
112
145
|
default=None,
|
|
@@ -119,24 +152,23 @@ def kubernetes():
|
|
|
119
152
|
type=JSONTypeClass(),
|
|
120
153
|
multiple=False,
|
|
121
154
|
)
|
|
122
|
-
@click.option("--ubf-context", default=None, type=click.Choice([None, "ubf_control"]))
|
|
123
155
|
@click.option(
|
|
124
|
-
"--
|
|
125
|
-
default=
|
|
126
|
-
type=
|
|
127
|
-
|
|
156
|
+
"--security-context",
|
|
157
|
+
default=None,
|
|
158
|
+
type=JSONTypeClass(),
|
|
159
|
+
multiple=False,
|
|
128
160
|
)
|
|
129
|
-
@click.option("--shared-memory", default=None, help="Size of shared memory in MiB")
|
|
130
|
-
@click.option("--port", default=None, help="Port number to expose from the container")
|
|
131
161
|
@click.pass_context
|
|
132
162
|
def step(
|
|
133
163
|
ctx,
|
|
134
164
|
step_name,
|
|
165
|
+
code_package_metadata,
|
|
135
166
|
code_package_sha,
|
|
136
167
|
code_package_url,
|
|
137
168
|
executable=None,
|
|
138
169
|
image=None,
|
|
139
170
|
image_pull_policy=None,
|
|
171
|
+
image_pull_secrets=None,
|
|
140
172
|
service_account=None,
|
|
141
173
|
secrets=None,
|
|
142
174
|
node_selector=None,
|
|
@@ -153,11 +185,13 @@ def step(
|
|
|
153
185
|
run_time_limit=None,
|
|
154
186
|
persistent_volume_claims=None,
|
|
155
187
|
tolerations=None,
|
|
156
|
-
labels=None,
|
|
157
|
-
annotations=None,
|
|
158
|
-
num_parallel=None,
|
|
159
188
|
shared_memory=None,
|
|
160
189
|
port=None,
|
|
190
|
+
num_parallel=None,
|
|
191
|
+
qos=None,
|
|
192
|
+
labels=None,
|
|
193
|
+
annotations=None,
|
|
194
|
+
security_context=None,
|
|
161
195
|
**kwargs
|
|
162
196
|
):
|
|
163
197
|
def echo(msg, stream="stderr", job_id=None, **kwargs):
|
|
@@ -172,7 +206,7 @@ def step(
|
|
|
172
206
|
executable = ctx.obj.environment.executable(step_name, executable)
|
|
173
207
|
|
|
174
208
|
# Set environment
|
|
175
|
-
env = {}
|
|
209
|
+
env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
|
|
176
210
|
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
|
|
177
211
|
if env_deco:
|
|
178
212
|
env = env_deco[0].attributes["vars"]
|
|
@@ -189,6 +223,12 @@ def step(
|
|
|
189
223
|
kwargs["input_paths"] = "".join("${%s}" % s for s in split_vars.keys())
|
|
190
224
|
env.update(split_vars)
|
|
191
225
|
|
|
226
|
+
if num_parallel is not None and num_parallel <= 1:
|
|
227
|
+
raise KubernetesException(
|
|
228
|
+
"Using @parallel with `num_parallel` <= 1 is not supported with "
|
|
229
|
+
"@kubernetes. Please set the value of `num_parallel` to be greater than 1."
|
|
230
|
+
)
|
|
231
|
+
|
|
192
232
|
# Set retry policy.
|
|
193
233
|
retry_count = int(kwargs.get("retry_count", 0))
|
|
194
234
|
retry_deco = [deco for deco in node.decorators if deco.name == "retry"]
|
|
@@ -203,25 +243,37 @@ def step(
|
|
|
203
243
|
)
|
|
204
244
|
time.sleep(minutes_between_retries * 60)
|
|
205
245
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
246
|
+
# Explicitly Remove `ubf_context` from `kwargs` so that it's not passed as a commandline option
|
|
247
|
+
# If an underlying step command is executing a vanilla Kubernetes job, then it should never need
|
|
248
|
+
# to know about the UBF context.
|
|
249
|
+
# If it is a jobset which is executing a multi-node job, then the UBF context is set based on the
|
|
250
|
+
# `ubf_context` parameter passed to the jobset.
|
|
251
|
+
kwargs.pop("ubf_context", None)
|
|
252
|
+
# `task_id` is also need to be removed from `kwargs` as it needs to be dynamically
|
|
253
|
+
# set in the downstream code IF num_parallel is > 1
|
|
254
|
+
task_id = kwargs["task_id"]
|
|
255
|
+
if num_parallel:
|
|
256
|
+
kwargs.pop("task_id")
|
|
211
257
|
|
|
212
258
|
step_cli = "{entrypoint} {top_args} step {step} {step_args}".format(
|
|
213
259
|
entrypoint="%s -u %s" % (executable, os.path.basename(sys.argv[0])),
|
|
214
260
|
top_args=" ".join(util.dict_to_cli_options(ctx.parent.parent.params)),
|
|
215
261
|
step=step_name,
|
|
216
|
-
step_args=
|
|
262
|
+
step_args=" ".join(util.dict_to_cli_options(kwargs)),
|
|
217
263
|
)
|
|
264
|
+
# Since it is a parallel step there are some parts of the step_cli that need to be modified
|
|
265
|
+
# based on the type of worker in the JobSet. This is why we will create a placeholder string
|
|
266
|
+
# in the template which will be replaced based on the type of worker.
|
|
267
|
+
|
|
268
|
+
if num_parallel:
|
|
269
|
+
step_cli = "%s {METAFLOW_PARALLEL_STEP_CLI_OPTIONS_TEMPLATE}" % step_cli
|
|
218
270
|
|
|
219
271
|
# Set log tailing.
|
|
220
272
|
ds = ctx.obj.flow_datastore.get_task_datastore(
|
|
221
273
|
mode="w",
|
|
222
274
|
run_id=kwargs["run_id"],
|
|
223
275
|
step_name=step_name,
|
|
224
|
-
task_id=
|
|
276
|
+
task_id=task_id,
|
|
225
277
|
attempt=int(retry_count),
|
|
226
278
|
)
|
|
227
279
|
stdout_location = ds.get_log_location(TASK_LOG_SOURCE, "stdout")
|
|
@@ -235,14 +287,10 @@ def step(
|
|
|
235
287
|
sync_local_metadata_from_datastore(
|
|
236
288
|
DATASTORE_LOCAL_DIR,
|
|
237
289
|
ctx.obj.flow_datastore.get_task_datastore(
|
|
238
|
-
kwargs["run_id"], step_name,
|
|
290
|
+
kwargs["run_id"], step_name, task_id
|
|
239
291
|
),
|
|
240
292
|
)
|
|
241
293
|
|
|
242
|
-
attrs = {
|
|
243
|
-
"metaflow.task_id": kwargs["task_id"],
|
|
244
|
-
"requires_passwordless_ssh": any([getattr(deco, "requires_passwordless_ssh", False) for deco in node.decorators]),
|
|
245
|
-
}
|
|
246
294
|
try:
|
|
247
295
|
kubernetes = Kubernetes(
|
|
248
296
|
datastore=ctx.obj.flow_datastore,
|
|
@@ -255,15 +303,17 @@ def step(
|
|
|
255
303
|
flow_name=ctx.obj.flow.name,
|
|
256
304
|
run_id=kwargs["run_id"],
|
|
257
305
|
step_name=step_name,
|
|
258
|
-
task_id=
|
|
306
|
+
task_id=task_id,
|
|
259
307
|
attempt=str(retry_count),
|
|
260
308
|
user=util.get_username(),
|
|
309
|
+
code_package_metadata=code_package_metadata,
|
|
261
310
|
code_package_sha=code_package_sha,
|
|
262
311
|
code_package_url=code_package_url,
|
|
263
312
|
code_package_ds=ctx.obj.flow_datastore.TYPE,
|
|
264
313
|
step_cli=step_cli,
|
|
265
314
|
docker_image=image,
|
|
266
315
|
docker_image_pull_policy=image_pull_policy,
|
|
316
|
+
image_pull_secrets=image_pull_secrets,
|
|
267
317
|
service_account=service_account,
|
|
268
318
|
secrets=secrets,
|
|
269
319
|
node_selector=node_selector,
|
|
@@ -281,14 +331,15 @@ def step(
|
|
|
281
331
|
env=env,
|
|
282
332
|
persistent_volume_claims=persistent_volume_claims,
|
|
283
333
|
tolerations=tolerations,
|
|
284
|
-
labels=labels,
|
|
285
|
-
annotations=annotations,
|
|
286
|
-
num_parallel=num_parallel,
|
|
287
334
|
shared_memory=shared_memory,
|
|
288
335
|
port=port,
|
|
289
|
-
|
|
336
|
+
num_parallel=num_parallel,
|
|
337
|
+
qos=qos,
|
|
338
|
+
labels=labels,
|
|
339
|
+
annotations=annotations,
|
|
340
|
+
security_context=security_context,
|
|
290
341
|
)
|
|
291
|
-
except Exception
|
|
342
|
+
except Exception:
|
|
292
343
|
traceback.print_exc(chain=False)
|
|
293
344
|
_sync_metadata()
|
|
294
345
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|
|
@@ -300,3 +351,84 @@ def step(
|
|
|
300
351
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|
|
301
352
|
finally:
|
|
302
353
|
_sync_metadata()
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@kubernetes.command(help="List unfinished Kubernetes tasks of this flow.")
|
|
357
|
+
@click.option(
|
|
358
|
+
"--my-runs",
|
|
359
|
+
default=False,
|
|
360
|
+
is_flag=True,
|
|
361
|
+
help="List all my unfinished tasks.",
|
|
362
|
+
)
|
|
363
|
+
@click.option("--user", default=None, help="List unfinished tasks for the given user.")
|
|
364
|
+
@click.option(
|
|
365
|
+
"--run-id",
|
|
366
|
+
default=None,
|
|
367
|
+
help="List unfinished tasks corresponding to the run id.",
|
|
368
|
+
)
|
|
369
|
+
@click.pass_obj
|
|
370
|
+
def list(obj, run_id, user, my_runs):
|
|
371
|
+
flow_name, run_id, user = parse_cli_options(
|
|
372
|
+
obj.flow.name, run_id, user, my_runs, obj.echo
|
|
373
|
+
)
|
|
374
|
+
kube_client = KubernetesClient()
|
|
375
|
+
pods = kube_client.list(obj.flow.name, run_id, user)
|
|
376
|
+
|
|
377
|
+
def format_timestamp(timestamp=None):
|
|
378
|
+
if timestamp is None:
|
|
379
|
+
return "-"
|
|
380
|
+
return timestamp.strftime("%Y-%m-%d %H:%M:%S")
|
|
381
|
+
|
|
382
|
+
for pod in pods:
|
|
383
|
+
obj.echo(
|
|
384
|
+
"Run: *{run_id}* "
|
|
385
|
+
"Pod: *{pod_id}* "
|
|
386
|
+
"Started At: {startedAt} "
|
|
387
|
+
"Status: *{status}*".format(
|
|
388
|
+
run_id=pod.metadata.annotations.get(
|
|
389
|
+
"metaflow/run_id",
|
|
390
|
+
pod.metadata.labels.get("workflows.argoproj.io/workflow"),
|
|
391
|
+
),
|
|
392
|
+
pod_id=pod.metadata.name,
|
|
393
|
+
startedAt=format_timestamp(pod.status.start_time),
|
|
394
|
+
status=pod.status.phase,
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if not pods:
|
|
399
|
+
obj.echo("No active Kubernetes pods found.")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
@kubernetes.command(
|
|
403
|
+
help="Terminate unfinished Kubernetes tasks of this flow. Killed pods may result in newer attempts when using @retry."
|
|
404
|
+
)
|
|
405
|
+
@click.option(
|
|
406
|
+
"--my-runs",
|
|
407
|
+
default=False,
|
|
408
|
+
is_flag=True,
|
|
409
|
+
help="Kill all my unfinished tasks.",
|
|
410
|
+
)
|
|
411
|
+
@click.option(
|
|
412
|
+
"--user",
|
|
413
|
+
default=None,
|
|
414
|
+
help="Terminate unfinished tasks for the given user.",
|
|
415
|
+
)
|
|
416
|
+
@click.option(
|
|
417
|
+
"--run-id",
|
|
418
|
+
default=None,
|
|
419
|
+
help="Terminate unfinished tasks corresponding to the run id.",
|
|
420
|
+
)
|
|
421
|
+
@click.pass_obj
|
|
422
|
+
def kill(obj, run_id, user, my_runs):
|
|
423
|
+
flow_name, run_id, user = parse_cli_options(
|
|
424
|
+
obj.flow.name, run_id, user, my_runs, obj.echo
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
if run_id is not None and run_id.startswith("argo-") or user == "argo-workflows":
|
|
428
|
+
raise MetaflowException(
|
|
429
|
+
"Killing pods launched by Argo Workflows is not supported. "
|
|
430
|
+
"Use *argo-workflows terminate* instead."
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
kube_client = KubernetesClient()
|
|
434
|
+
kube_client.kill_pods(flow_name, run_id, user, obj.echo)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
1
2
|
import os
|
|
2
3
|
import sys
|
|
3
4
|
import time
|
|
4
5
|
|
|
5
6
|
from metaflow.exception import MetaflowException
|
|
7
|
+
from metaflow.metaflow_config import KUBERNETES_NAMESPACE
|
|
6
8
|
|
|
7
|
-
from .kubernetes_job import KubernetesJob
|
|
8
|
-
|
|
9
|
+
from .kubernetes_job import KubernetesJob, KubernetesJobSet
|
|
9
10
|
|
|
10
11
|
CLIENT_REFRESH_INTERVAL_SECONDS = 300
|
|
11
12
|
|
|
@@ -29,6 +30,7 @@ class KubernetesClient(object):
|
|
|
29
30
|
% sys.executable
|
|
30
31
|
)
|
|
31
32
|
self._refresh_client()
|
|
33
|
+
self._namespace = KUBERNETES_NAMESPACE
|
|
32
34
|
|
|
33
35
|
def _refresh_client(self):
|
|
34
36
|
from kubernetes import client, config
|
|
@@ -61,5 +63,105 @@ class KubernetesClient(object):
|
|
|
61
63
|
|
|
62
64
|
return self._client
|
|
63
65
|
|
|
66
|
+
def _find_active_pods(self, flow_name, run_id=None, user=None):
|
|
67
|
+
def _request(_continue=None):
|
|
68
|
+
# handle paginated responses
|
|
69
|
+
return self._client.CoreV1Api().list_namespaced_pod(
|
|
70
|
+
namespace=self._namespace,
|
|
71
|
+
# limited selector support for K8S api. We want to cover multiple statuses: Running / Pending / Unknown
|
|
72
|
+
field_selector="status.phase!=Succeeded,status.phase!=Failed",
|
|
73
|
+
limit=1000,
|
|
74
|
+
_continue=_continue,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
results = _request()
|
|
78
|
+
|
|
79
|
+
if run_id is not None:
|
|
80
|
+
# handle argo prefixes in run_id
|
|
81
|
+
run_id = run_id[run_id.startswith("argo-") and len("argo-") :]
|
|
82
|
+
|
|
83
|
+
while results.metadata._continue or results.items:
|
|
84
|
+
for pod in results.items:
|
|
85
|
+
match = (
|
|
86
|
+
# arbitrary pods might have no annotations at all.
|
|
87
|
+
pod.metadata.annotations
|
|
88
|
+
and pod.metadata.labels
|
|
89
|
+
and (
|
|
90
|
+
run_id is None
|
|
91
|
+
or (pod.metadata.annotations.get("metaflow/run_id") == run_id)
|
|
92
|
+
# we want to also match pods launched by argo-workflows
|
|
93
|
+
or (
|
|
94
|
+
pod.metadata.labels.get("workflows.argoproj.io/workflow")
|
|
95
|
+
== run_id
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
and (
|
|
99
|
+
user is None
|
|
100
|
+
or pod.metadata.annotations.get("metaflow/user") == user
|
|
101
|
+
)
|
|
102
|
+
and (
|
|
103
|
+
pod.metadata.annotations.get("metaflow/flow_name") == flow_name
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
if match:
|
|
107
|
+
yield pod
|
|
108
|
+
if not results.metadata._continue:
|
|
109
|
+
break
|
|
110
|
+
results = _request(results.metadata._continue)
|
|
111
|
+
|
|
112
|
+
def list(self, flow_name, run_id, user):
|
|
113
|
+
results = self._find_active_pods(flow_name, run_id, user)
|
|
114
|
+
|
|
115
|
+
return list(results)
|
|
116
|
+
|
|
117
|
+
def kill_pods(self, flow_name, run_id, user, echo):
|
|
118
|
+
from kubernetes.stream import stream
|
|
119
|
+
|
|
120
|
+
api_instance = self._client.CoreV1Api()
|
|
121
|
+
job_api = self._client.BatchV1Api()
|
|
122
|
+
pods = self._find_active_pods(flow_name, run_id, user)
|
|
123
|
+
|
|
124
|
+
def _kill_pod(pod):
|
|
125
|
+
echo("Killing Kubernetes pod %s\n" % pod.metadata.name)
|
|
126
|
+
try:
|
|
127
|
+
stream(
|
|
128
|
+
api_instance.connect_get_namespaced_pod_exec,
|
|
129
|
+
name=pod.metadata.name,
|
|
130
|
+
namespace=pod.metadata.namespace,
|
|
131
|
+
command=[
|
|
132
|
+
"/bin/sh",
|
|
133
|
+
"-c",
|
|
134
|
+
"/sbin/killall5",
|
|
135
|
+
],
|
|
136
|
+
stderr=True,
|
|
137
|
+
stdin=False,
|
|
138
|
+
stdout=True,
|
|
139
|
+
tty=False,
|
|
140
|
+
)
|
|
141
|
+
except Exception:
|
|
142
|
+
# best effort kill for pod can fail.
|
|
143
|
+
try:
|
|
144
|
+
job_name = pod.metadata.labels.get("job-name", None)
|
|
145
|
+
if job_name is None:
|
|
146
|
+
raise Exception("Could not determine job name")
|
|
147
|
+
|
|
148
|
+
job_api.patch_namespaced_job(
|
|
149
|
+
name=job_name,
|
|
150
|
+
namespace=pod.metadata.namespace,
|
|
151
|
+
field_manager="metaflow",
|
|
152
|
+
body={"spec": {"parallelism": 0}},
|
|
153
|
+
)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
echo("failed to kill pod %s - %s" % (pod.metadata.name, str(e)))
|
|
156
|
+
|
|
157
|
+
with ThreadPoolExecutor() as executor:
|
|
158
|
+
operated_pods = list(executor.map(_kill_pod, pods))
|
|
159
|
+
|
|
160
|
+
if not operated_pods:
|
|
161
|
+
echo("No active Kubernetes pods found for run *%s*" % run_id)
|
|
162
|
+
|
|
163
|
+
def jobset(self, **kwargs):
|
|
164
|
+
return KubernetesJobSet(self, **kwargs)
|
|
165
|
+
|
|
64
166
|
def job(self, **kwargs):
|
|
65
167
|
return KubernetesJob(self, **kwargs)
|