ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +10 -3
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +4 -0
- metaflow/cli.py +125 -21
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +204 -40
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +198 -130
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/cmd/develop/stub_generator.py +49 -18
- metaflow/cmd/develop/stubs.py +9 -27
- metaflow/cmd/make_wrapper.py +30 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +124 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +92 -6
- metaflow/debug.py +5 -0
- metaflow/decorators.py +331 -82
- metaflow/extension_support/__init__.py +414 -356
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +322 -82
- metaflow/graph.py +178 -15
- metaflow/includefile.py +25 -3
- metaflow/lint.py +94 -3
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +13 -2
- metaflow/metaflow_config.py +66 -4
- metaflow/metaflow_environment.py +91 -25
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +16 -1
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +6 -2
- metaflow/plugins/__init__.py +6 -0
- metaflow/plugins/airflow/airflow.py +11 -1
- metaflow/plugins/airflow/airflow_cli.py +16 -5
- metaflow/plugins/argo/argo_client.py +42 -20
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +1023 -344
- metaflow/plugins/argo/argo_workflows_cli.py +396 -94
- metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
- metaflow/plugins/argo/capture_error.py +5 -2
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +6 -0
- metaflow/plugins/aws/aws_utils.py +33 -1
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +24 -3
- metaflow/plugins/aws/batch/batch_decorator.py +57 -6
- metaflow/plugins/aws/step_functions/step_functions.py +28 -3
- metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_cli.py +20 -1
- metaflow/plugins/cards/card_creator.py +24 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +58 -6
- metaflow/plugins/cards/card_modules/basic.py +38 -9
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +592 -3
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +56 -41
- metaflow/plugins/cards/card_modules/test_cards.py +22 -6
- metaflow/plugins/cards/component_serializer.py +1 -8
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +49 -17
- metaflow/plugins/datatools/s3/s3op.py +113 -66
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/events_decorator.py +127 -121
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/kubernetes/kubernetes.py +12 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
- metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/service.py +13 -9
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +11 -2
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/pypi/bootstrap.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +41 -82
- metaflow/plugins/pypi/conda_environment.py +14 -6
- metaflow/plugins/pypi/micromamba.py +9 -1
- metaflow/plugins/pypi/pip.py +41 -5
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/pypi/utils.py +22 -0
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +14 -178
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +29 -1
- metaflow/plugins/uv/uv_environment.py +5 -3
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +79 -26
- metaflow/runner/deployer.py +208 -6
- metaflow/runner/deployer_impl.py +32 -12
- metaflow/runner/metaflow_runner.py +266 -33
- metaflow/runner/subprocess_manager.py +21 -1
- metaflow/runner/utils.py +27 -16
- metaflow/runtime.py +660 -66
- metaflow/task.py +255 -26
- metaflow/user_configs/config_options.py +33 -21
- metaflow/user_configs/config_parameters.py +220 -58
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +197 -7
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/_vendor/v3_5/zipp.py +0 -329
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -34,7 +34,7 @@ class TestPathSpecCard(MetaflowCard):
|
|
|
34
34
|
class TestEditableCard(MetaflowCard):
|
|
35
35
|
type = "test_editable_card"
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
separator = "$&#!!@*"
|
|
38
38
|
|
|
39
39
|
ALLOW_USER_COMPONENTS = True
|
|
40
40
|
|
|
@@ -42,13 +42,13 @@ class TestEditableCard(MetaflowCard):
|
|
|
42
42
|
self._components = components
|
|
43
43
|
|
|
44
44
|
def render(self, task):
|
|
45
|
-
return self.
|
|
45
|
+
return self.separator.join([str(comp) for comp in self._components])
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class TestEditableCard2(MetaflowCard):
|
|
49
49
|
type = "test_editable_card_2"
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
separator = "$&#!!@*"
|
|
52
52
|
|
|
53
53
|
ALLOW_USER_COMPONENTS = True
|
|
54
54
|
|
|
@@ -56,19 +56,19 @@ class TestEditableCard2(MetaflowCard):
|
|
|
56
56
|
self._components = components
|
|
57
57
|
|
|
58
58
|
def render(self, task):
|
|
59
|
-
return self.
|
|
59
|
+
return self.separator.join([str(comp) for comp in self._components])
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class TestNonEditableCard(MetaflowCard):
|
|
63
63
|
type = "test_non_editable_card"
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
separator = "$&#!!@*"
|
|
66
66
|
|
|
67
67
|
def __init__(self, components=[], **kwargs):
|
|
68
68
|
self._components = components
|
|
69
69
|
|
|
70
70
|
def render(self, task):
|
|
71
|
-
return self.
|
|
71
|
+
return self.separator.join([str(comp) for comp in self._components])
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
class TestMockCard(MetaflowCard):
|
|
@@ -213,3 +213,19 @@ class TestRefreshComponentCard(MetaflowCard):
|
|
|
213
213
|
if task.finished:
|
|
214
214
|
return "final"
|
|
215
215
|
return "runtime-%s" % _component_values_to_hash(data["components"])
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class TestImageCard(MetaflowCard):
|
|
219
|
+
"""Card that renders a tiny PNG using ``TaskToDict.parse_image``."""
|
|
220
|
+
|
|
221
|
+
type = "test_image_card"
|
|
222
|
+
|
|
223
|
+
def render(self, task):
|
|
224
|
+
from .convert_to_native_type import TaskToDict
|
|
225
|
+
import base64
|
|
226
|
+
|
|
227
|
+
png_bytes = base64.b64decode(
|
|
228
|
+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABRDE8UwAAAABJRU5ErkJggg=="
|
|
229
|
+
)
|
|
230
|
+
img_src = TaskToDict().parse_image(png_bytes)
|
|
231
|
+
return f"<html><img src='{img_src}' /></html>"
|
|
@@ -57,15 +57,8 @@ class ComponentStore:
|
|
|
57
57
|
The `_component_map` attribute is supposed to be a dictionary so that we can access the components by their ids.
|
|
58
58
|
But we also want to maintain order in which components are inserted since all of these components are going to be visible on a UI.
|
|
59
59
|
Since python3.6 dictionaries are ordered by default so we can use the default python `dict`.
|
|
60
|
-
For python3.5 and below we need to use an OrderedDict since `dict`'s are not ordered by default.
|
|
61
60
|
"""
|
|
62
|
-
|
|
63
|
-
platform.python_version_tuple()[1]
|
|
64
|
-
)
|
|
65
|
-
if python_version < 36:
|
|
66
|
-
self._component_map = OrderedDict()
|
|
67
|
-
else:
|
|
68
|
-
self._component_map = {}
|
|
61
|
+
self._component_map = {}
|
|
69
62
|
|
|
70
63
|
def __init__(self, logger, card_type=None, components=None, user_set_id=None):
|
|
71
64
|
self._logger = logger
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from metaflow.metadata_provider import MetaDatum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _save_metadata(
|
|
6
|
+
metadata_provider,
|
|
7
|
+
run_id,
|
|
8
|
+
step_name,
|
|
9
|
+
task_id,
|
|
10
|
+
attempt_id,
|
|
11
|
+
card_uuid,
|
|
12
|
+
save_metadata,
|
|
13
|
+
):
|
|
14
|
+
entries = [
|
|
15
|
+
MetaDatum(
|
|
16
|
+
field=card_uuid,
|
|
17
|
+
value=json.dumps(save_metadata),
|
|
18
|
+
type="card-info",
|
|
19
|
+
tags=["attempt_id:{0}".format(attempt_id)],
|
|
20
|
+
)
|
|
21
|
+
]
|
|
22
|
+
metadata_provider.register_metadata(run_id, step_name, task_id, entries)
|
|
@@ -52,6 +52,15 @@ class CatchDecorator(StepDecorator):
|
|
|
52
52
|
"split steps." % step
|
|
53
53
|
)
|
|
54
54
|
|
|
55
|
+
# Do not support catch on switch steps for now.
|
|
56
|
+
# When applying @catch to a switch step, we can not guarantee that the flow attribute used for the switching condition gets properly recorded.
|
|
57
|
+
if graph[step].type == "split-switch":
|
|
58
|
+
raise MetaflowException(
|
|
59
|
+
"@catch is defined for the step *%s* "
|
|
60
|
+
"but @catch is not supported in conditional "
|
|
61
|
+
"switch steps." % step
|
|
62
|
+
)
|
|
63
|
+
|
|
55
64
|
def _print_exception(self, step, flow):
|
|
56
65
|
self.logger(head="@catch caught an exception from %s" % flow, timestamp=False)
|
|
57
66
|
for line in traceback.format_exc().splitlines():
|
|
@@ -1,24 +1,29 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from metaflow.metaflow_config import
|
|
4
|
+
from metaflow.metaflow_config import (
|
|
5
|
+
DATASTORE_LOCAL_DIR,
|
|
6
|
+
DATASTORE_SYSROOT_LOCAL,
|
|
7
|
+
)
|
|
5
8
|
from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class LocalStorage(DataStoreStorage):
|
|
9
12
|
TYPE = "local"
|
|
10
13
|
METADATA_DIR = "_meta"
|
|
14
|
+
DATASTORE_DIR = DATASTORE_LOCAL_DIR # ".metaflow"
|
|
15
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_LOCAL
|
|
11
16
|
|
|
12
17
|
@classmethod
|
|
13
18
|
def get_datastore_root_from_config(cls, echo, create_on_absent=True):
|
|
14
|
-
result =
|
|
19
|
+
result = cls.SYSROOT_VAR
|
|
15
20
|
if result is None:
|
|
16
21
|
try:
|
|
17
22
|
# Python2
|
|
18
23
|
current_path = os.getcwdu()
|
|
19
24
|
except: # noqa E722
|
|
20
25
|
current_path = os.getcwd()
|
|
21
|
-
check_dir = os.path.join(current_path,
|
|
26
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
22
27
|
check_dir = os.path.realpath(check_dir)
|
|
23
28
|
orig_path = check_dir
|
|
24
29
|
top_level_reached = False
|
|
@@ -28,12 +33,13 @@ class LocalStorage(DataStoreStorage):
|
|
|
28
33
|
top_level_reached = True
|
|
29
34
|
break # We are no longer making upward progress
|
|
30
35
|
current_path = new_path
|
|
31
|
-
check_dir = os.path.join(current_path,
|
|
36
|
+
check_dir = os.path.join(current_path, cls.DATASTORE_DIR)
|
|
32
37
|
if top_level_reached:
|
|
33
38
|
if create_on_absent:
|
|
34
39
|
# Could not find any directory to use so create a new one
|
|
35
40
|
echo(
|
|
36
|
-
"Creating
|
|
41
|
+
"Creating %s datastore in current directory (%s)"
|
|
42
|
+
% (cls.TYPE, orig_path)
|
|
37
43
|
)
|
|
38
44
|
os.mkdir(orig_path)
|
|
39
45
|
result = orig_path
|
|
@@ -42,7 +48,7 @@ class LocalStorage(DataStoreStorage):
|
|
|
42
48
|
else:
|
|
43
49
|
result = check_dir
|
|
44
50
|
else:
|
|
45
|
-
result = os.path.join(result,
|
|
51
|
+
result = os.path.join(result, cls.DATASTORE_DIR)
|
|
46
52
|
return result
|
|
47
53
|
|
|
48
54
|
@staticmethod
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from metaflow.metaflow_config import (
|
|
2
|
+
DATASTORE_SPIN_LOCAL_DIR,
|
|
3
|
+
DATASTORE_SYSROOT_SPIN,
|
|
4
|
+
)
|
|
5
|
+
from metaflow.plugins.datastores.local_storage import LocalStorage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SpinStorage(LocalStorage):
|
|
9
|
+
TYPE = "spin"
|
|
10
|
+
METADATA_DIR = "_meta"
|
|
11
|
+
DATASTORE_DIR = DATASTORE_SPIN_LOCAL_DIR # ".metaflow_spin"
|
|
12
|
+
SYSROOT_VAR = DATASTORE_SYSROOT_SPIN
|
|
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
|
|
|
18
18
|
DATATOOLS_S3ROOT,
|
|
19
19
|
S3_RETRY_COUNT,
|
|
20
20
|
S3_TRANSIENT_RETRY_COUNT,
|
|
21
|
+
S3_LOG_TRANSIENT_RETRIES,
|
|
21
22
|
S3_SERVER_SIDE_ENCRYPTION,
|
|
22
23
|
S3_WORKER_COUNT,
|
|
23
24
|
TEMPDIR,
|
|
@@ -498,16 +499,18 @@ class S3(object):
|
|
|
498
499
|
|
|
499
500
|
Parameters
|
|
500
501
|
----------
|
|
501
|
-
tmproot : str, default
|
|
502
|
+
tmproot : str, default '.'
|
|
502
503
|
Where to store the temporary directory.
|
|
503
|
-
bucket : str, optional
|
|
504
|
+
bucket : str, optional, default None
|
|
504
505
|
Override the bucket from `DATATOOLS_S3ROOT` when `run` is specified.
|
|
505
|
-
prefix : str, optional
|
|
506
|
+
prefix : str, optional, default None
|
|
506
507
|
Override the path from `DATATOOLS_S3ROOT` when `run` is specified.
|
|
507
|
-
run : FlowSpec or Run, optional
|
|
508
|
+
run : FlowSpec or Run, optional, default None
|
|
508
509
|
Derive path prefix from the current or a past run ID, e.g. S3(run=self).
|
|
509
|
-
s3root : str, optional
|
|
510
|
+
s3root : str, optional, default None
|
|
510
511
|
If `run` is not specified, use this as the S3 prefix.
|
|
512
|
+
encryption : str, optional, default None
|
|
513
|
+
Server-side encryption to use when uploading objects to S3.
|
|
511
514
|
"""
|
|
512
515
|
|
|
513
516
|
TYPE = "s3"
|
|
@@ -578,7 +581,13 @@ class S3(object):
|
|
|
578
581
|
self._s3_inject_failures = kwargs.get(
|
|
579
582
|
"inject_failure_rate", TEST_INJECT_RETRYABLE_FAILURES
|
|
580
583
|
)
|
|
581
|
-
|
|
584
|
+
# Storing tmproot, bucket, ... as members to allow easier reconstruction
|
|
585
|
+
# during JSON deserialization
|
|
586
|
+
self._tmproot = tmproot
|
|
587
|
+
self._bucket = bucket
|
|
588
|
+
self._prefix = prefix
|
|
589
|
+
self._run = run
|
|
590
|
+
self._tmpdir = mkdtemp(dir=self._tmproot, prefix="metaflow.s3.")
|
|
582
591
|
self._encryption = encryption
|
|
583
592
|
|
|
584
593
|
def __enter__(self) -> "S3":
|
|
@@ -629,7 +638,9 @@ class S3(object):
|
|
|
629
638
|
"Don't use absolute S3 URLs when the S3 client is "
|
|
630
639
|
"initialized with a prefix. URL: %s" % key
|
|
631
640
|
)
|
|
632
|
-
|
|
641
|
+
# Strip leading slashes to ensure os.path.join works correctly
|
|
642
|
+
# os.path.join discards the first argument if the second starts with '/'
|
|
643
|
+
return os.path.join(self._s3root, key.lstrip("/"))
|
|
633
644
|
else:
|
|
634
645
|
return self._s3root
|
|
635
646
|
|
|
@@ -1385,6 +1396,9 @@ class S3(object):
|
|
|
1385
1396
|
except OSError as e:
|
|
1386
1397
|
if e.errno == errno.ENOSPC:
|
|
1387
1398
|
raise MetaflowS3InsufficientDiskSpace(str(e))
|
|
1399
|
+
except MetaflowException as ex:
|
|
1400
|
+
# Re-raise Metaflow exceptions (including TimeoutException)
|
|
1401
|
+
raise
|
|
1388
1402
|
except Exception as ex:
|
|
1389
1403
|
error = str(ex)
|
|
1390
1404
|
if tmp:
|
|
@@ -1757,17 +1771,35 @@ class S3(object):
|
|
|
1757
1771
|
# due to a transient failure so we try again.
|
|
1758
1772
|
transient_retry_count += 1
|
|
1759
1773
|
total_ok_count += last_ok_count
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1774
|
+
|
|
1775
|
+
if S3_LOG_TRANSIENT_RETRIES:
|
|
1776
|
+
# Extract transient error type from pending retry lines
|
|
1777
|
+
error_info = ""
|
|
1778
|
+
if pending_retries:
|
|
1779
|
+
try:
|
|
1780
|
+
# Parse the first line to get transient error type
|
|
1781
|
+
first_retry = json.loads(
|
|
1782
|
+
pending_retries[0].decode("utf-8").strip()
|
|
1783
|
+
)
|
|
1784
|
+
if "transient_error_type" in first_retry:
|
|
1785
|
+
error_info = (
|
|
1786
|
+
" (%s)" % first_retry["transient_error_type"]
|
|
1787
|
+
)
|
|
1788
|
+
except (json.JSONDecodeError, IndexError, KeyError):
|
|
1789
|
+
pass
|
|
1790
|
+
|
|
1791
|
+
print(
|
|
1792
|
+
"Transient S3 failure (attempt #%d) -- total success: %d, "
|
|
1793
|
+
"last attempt %d/%d -- remaining: %d%s"
|
|
1794
|
+
% (
|
|
1795
|
+
transient_retry_count,
|
|
1796
|
+
total_ok_count,
|
|
1797
|
+
last_ok_count,
|
|
1798
|
+
last_ok_count + last_retry_count,
|
|
1799
|
+
len(pending_retries),
|
|
1800
|
+
error_info,
|
|
1801
|
+
)
|
|
1769
1802
|
)
|
|
1770
|
-
)
|
|
1771
1803
|
if inject_failures == 0:
|
|
1772
1804
|
# Don't sleep when we are "faking" the failures
|
|
1773
1805
|
self._jitter_sleep(transient_retry_count)
|
|
@@ -50,6 +50,7 @@ import metaflow.tracing as tracing
|
|
|
50
50
|
from metaflow.metaflow_config import (
|
|
51
51
|
S3_WORKER_COUNT,
|
|
52
52
|
)
|
|
53
|
+
from metaflow.exception import MetaflowException
|
|
53
54
|
|
|
54
55
|
DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
|
|
55
56
|
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
|
|
@@ -131,7 +132,7 @@ def normalize_client_error(err):
|
|
|
131
132
|
except ValueError:
|
|
132
133
|
if error_code in ("AccessDenied", "AllAccessDisabled", "InvalidAccessKeyId"):
|
|
133
134
|
return 403
|
|
134
|
-
if error_code
|
|
135
|
+
if error_code in ("NoSuchKey", "NoSuchBucket"):
|
|
135
136
|
return 404
|
|
136
137
|
if error_code == "InvalidRange":
|
|
137
138
|
return 416
|
|
@@ -284,14 +285,23 @@ def worker(result_file_name, queue, mode, s3config):
|
|
|
284
285
|
"%d %d\n" % (idx, -ERROR_OUT_OF_DISK_SPACE)
|
|
285
286
|
)
|
|
286
287
|
else:
|
|
287
|
-
result_file.write(
|
|
288
|
+
result_file.write(
|
|
289
|
+
"%d %d %s\n" % (idx, -ERROR_TRANSIENT, "OSError")
|
|
290
|
+
)
|
|
288
291
|
result_file.flush()
|
|
289
292
|
continue
|
|
293
|
+
except MetaflowException:
|
|
294
|
+
# Re-raise Metaflow exceptions (including TimeoutException)
|
|
295
|
+
tmp.close()
|
|
296
|
+
os.unlink(tmp.name)
|
|
297
|
+
raise
|
|
290
298
|
except (SSLError, Exception) as e:
|
|
291
299
|
tmp.close()
|
|
292
300
|
os.unlink(tmp.name)
|
|
293
301
|
# assume anything else is transient
|
|
294
|
-
result_file.write(
|
|
302
|
+
result_file.write(
|
|
303
|
+
"%d %d %s\n" % (idx, -ERROR_TRANSIENT, type(e).__name__)
|
|
304
|
+
)
|
|
295
305
|
result_file.flush()
|
|
296
306
|
continue
|
|
297
307
|
# If we need the metadata, get it and write it out
|
|
@@ -357,9 +367,14 @@ def worker(result_file_name, queue, mode, s3config):
|
|
|
357
367
|
err = convert_to_client_error(e)
|
|
358
368
|
handle_client_error(err, idx, result_file)
|
|
359
369
|
continue
|
|
370
|
+
except MetaflowException:
|
|
371
|
+
# Re-raise Metaflow exceptions (including TimeoutException)
|
|
372
|
+
raise
|
|
360
373
|
except (SSLError, Exception) as e:
|
|
361
374
|
# assume anything else is transient
|
|
362
|
-
result_file.write(
|
|
375
|
+
result_file.write(
|
|
376
|
+
"%d %d %s\n" % (idx, -ERROR_TRANSIENT, type(e).__name__)
|
|
377
|
+
)
|
|
363
378
|
result_file.flush()
|
|
364
379
|
continue
|
|
365
380
|
except:
|
|
@@ -385,7 +400,13 @@ def convert_to_client_error(e):
|
|
|
385
400
|
|
|
386
401
|
|
|
387
402
|
def handle_client_error(err, idx, result_file):
|
|
403
|
+
# Handle all MetaflowExceptions as fatal
|
|
404
|
+
if isinstance(err, MetaflowException):
|
|
405
|
+
raise err
|
|
406
|
+
|
|
388
407
|
error_code = normalize_client_error(err)
|
|
408
|
+
original_error_code = err.response["Error"]["Code"]
|
|
409
|
+
|
|
389
410
|
if error_code == 404:
|
|
390
411
|
result_file.write("%d %d\n" % (idx, -ERROR_URL_NOT_FOUND))
|
|
391
412
|
result_file.flush()
|
|
@@ -393,13 +414,12 @@ def handle_client_error(err, idx, result_file):
|
|
|
393
414
|
result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED))
|
|
394
415
|
result_file.flush()
|
|
395
416
|
elif error_code == 503:
|
|
396
|
-
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
|
|
417
|
+
result_file.write("%d %d %s\n" % (idx, -ERROR_TRANSIENT, original_error_code))
|
|
397
418
|
result_file.flush()
|
|
398
419
|
else:
|
|
399
420
|
# optimistically assume it is a transient error
|
|
400
|
-
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
|
|
421
|
+
result_file.write("%d %d %s\n" % (idx, -ERROR_TRANSIENT, original_error_code))
|
|
401
422
|
result_file.flush()
|
|
402
|
-
# TODO specific error message for out of disk space
|
|
403
423
|
|
|
404
424
|
|
|
405
425
|
def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
@@ -411,6 +431,7 @@ def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
|
411
431
|
random.seed()
|
|
412
432
|
|
|
413
433
|
sz_results = []
|
|
434
|
+
transient_error_type = None
|
|
414
435
|
# 1. push sources and destinations to the queue
|
|
415
436
|
# We only push if we don't inject a failure; otherwise, we already set the sz_results
|
|
416
437
|
# appropriately with the result of the injected failure.
|
|
@@ -465,13 +486,19 @@ def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
|
465
486
|
# Read the output file if all went well
|
|
466
487
|
with open(out_path, "r") as out_file:
|
|
467
488
|
for line in out_file:
|
|
468
|
-
line_split = line.split(" ")
|
|
469
|
-
|
|
489
|
+
line_split = line.split(" ", 2)
|
|
490
|
+
idx = int(line_split[0])
|
|
491
|
+
size = int(line_split[1])
|
|
492
|
+
sz_results[idx] = size
|
|
493
|
+
|
|
494
|
+
# For transient errors, store the transient error type (should be the same for all)
|
|
495
|
+
if size == -ERROR_TRANSIENT and len(line_split) > 2:
|
|
496
|
+
transient_error_type = line_split[2].strip()
|
|
470
497
|
else:
|
|
471
498
|
# Put this process back in the processes to check
|
|
472
499
|
new_procs[proc] = out_path
|
|
473
500
|
procs = new_procs
|
|
474
|
-
return sz_results
|
|
501
|
+
return sz_results, transient_error_type
|
|
475
502
|
|
|
476
503
|
|
|
477
504
|
def process_urls(mode, urls, verbose, inject_failure, num_workers, s3config):
|
|
@@ -480,7 +507,9 @@ def process_urls(mode, urls, verbose, inject_failure, num_workers, s3config):
|
|
|
480
507
|
print("%sing %d files.." % (mode.capitalize(), len(urls)), file=sys.stderr)
|
|
481
508
|
|
|
482
509
|
start = time.time()
|
|
483
|
-
sz_results = start_workers(
|
|
510
|
+
sz_results, transient_error_type = start_workers(
|
|
511
|
+
mode, urls, num_workers, inject_failure, s3config
|
|
512
|
+
)
|
|
484
513
|
end = time.time()
|
|
485
514
|
|
|
486
515
|
if verbose:
|
|
@@ -497,7 +526,7 @@ def process_urls(mode, urls, verbose, inject_failure, num_workers, s3config):
|
|
|
497
526
|
),
|
|
498
527
|
file=sys.stderr,
|
|
499
528
|
)
|
|
500
|
-
return sz_results
|
|
529
|
+
return sz_results, transient_error_type
|
|
501
530
|
|
|
502
531
|
|
|
503
532
|
# Utility functions
|
|
@@ -582,11 +611,12 @@ class S3Ops(object):
|
|
|
582
611
|
# - the trailing slash is significant in S3
|
|
583
612
|
if "Contents" in page:
|
|
584
613
|
for key in page.get("Contents", []):
|
|
585
|
-
|
|
614
|
+
key_path = key["Key"].lstrip("/")
|
|
615
|
+
url = url_base + key_path
|
|
586
616
|
urlobj = S3Url(
|
|
587
617
|
url=url,
|
|
588
618
|
bucket=prefix_url.bucket,
|
|
589
|
-
path=
|
|
619
|
+
path=key_path,
|
|
590
620
|
local=generate_local_path(url),
|
|
591
621
|
prefix=prefix_url.url,
|
|
592
622
|
)
|
|
@@ -694,9 +724,21 @@ def generate_local_path(url, range="whole", suffix=None):
|
|
|
694
724
|
quoted = url_quote(url)
|
|
695
725
|
fname = quoted.split(b"/")[-1].replace(b".", b"_").replace(b"-", b"_")
|
|
696
726
|
sha = sha1(quoted).hexdigest()
|
|
727
|
+
|
|
728
|
+
# Truncate fname to ensure the final filename doesn't exceed filesystem limits.
|
|
729
|
+
# Most filesystems have a 255 character limit. The structure is:
|
|
730
|
+
# <40-char-sha>-<fname>-<range>[-<suffix>]
|
|
731
|
+
# We need to leave room for: sha (40) + hyphens (2-3) + range (~10) + suffix (~10)
|
|
732
|
+
# This leaves roughly 190 characters for fname. We use 150 to be safe.
|
|
733
|
+
fname_decoded = fname.decode("utf-8")
|
|
734
|
+
max_fname_len = 150
|
|
735
|
+
if len(fname_decoded) > max_fname_len:
|
|
736
|
+
# Truncate and add an ellipsis to indicate truncation
|
|
737
|
+
fname_decoded = fname_decoded[:max_fname_len] + "..."
|
|
738
|
+
|
|
697
739
|
if suffix:
|
|
698
|
-
return "-".join((sha,
|
|
699
|
-
return "-".join((sha,
|
|
740
|
+
return "-".join((sha, fname_decoded, range, suffix))
|
|
741
|
+
return "-".join((sha, fname_decoded, range))
|
|
700
742
|
|
|
701
743
|
|
|
702
744
|
def parallel_op(op, lst, num_workers):
|
|
@@ -833,11 +875,16 @@ def lst(
|
|
|
833
875
|
urllist = []
|
|
834
876
|
to_iterate, _ = _populate_prefixes(prefixes, inputs)
|
|
835
877
|
for _, prefix, url, _ in to_iterate:
|
|
836
|
-
src = urlparse(url)
|
|
878
|
+
src = urlparse(url, allow_fragments=False)
|
|
879
|
+
# We always consider the path being passed in to be a directory path so
|
|
880
|
+
# we add a trailing slash to the path if it doesn't already have one.
|
|
881
|
+
path_with_slash = src.path.lstrip("/")
|
|
882
|
+
if not path_with_slash.endswith("/"):
|
|
883
|
+
path_with_slash += "/"
|
|
837
884
|
url = S3Url(
|
|
838
885
|
url=url,
|
|
839
886
|
bucket=src.netloc,
|
|
840
|
-
path=
|
|
887
|
+
path=path_with_slash,
|
|
841
888
|
local=None,
|
|
842
889
|
prefix=prefix,
|
|
843
890
|
)
|
|
@@ -939,7 +986,7 @@ def put(
|
|
|
939
986
|
yield input_line_idx, local, url, content_type, metadata, encryption
|
|
940
987
|
|
|
941
988
|
def _make_url(idx, local, user_url, content_type, metadata, encryption):
|
|
942
|
-
src = urlparse(user_url)
|
|
989
|
+
src = urlparse(user_url, allow_fragments=False)
|
|
943
990
|
url = S3Url(
|
|
944
991
|
url=user_url,
|
|
945
992
|
bucket=src.netloc,
|
|
@@ -967,7 +1014,7 @@ def put(
|
|
|
967
1014
|
ul_op = "upload"
|
|
968
1015
|
if not overwrite:
|
|
969
1016
|
ul_op = "info_upload"
|
|
970
|
-
sz_results = process_urls(
|
|
1017
|
+
sz_results, transient_error_type = process_urls(
|
|
971
1018
|
ul_op, urls, verbose, inject_failure, num_workers, s3config
|
|
972
1019
|
)
|
|
973
1020
|
retry_lines = []
|
|
@@ -985,19 +1032,17 @@ def put(
|
|
|
985
1032
|
elif listing and sz == 0:
|
|
986
1033
|
out_lines.append(format_result_line(url.idx, url.url) + "\n")
|
|
987
1034
|
elif sz == -ERROR_TRANSIENT:
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
+ "\n"
|
|
1000
|
-
)
|
|
1035
|
+
retry_data = {
|
|
1036
|
+
"idx": url.idx,
|
|
1037
|
+
"url": url.url,
|
|
1038
|
+
"local": url.local,
|
|
1039
|
+
"content_type": url.content_type,
|
|
1040
|
+
"metadata": url.metadata,
|
|
1041
|
+
"encryption": url.encryption,
|
|
1042
|
+
}
|
|
1043
|
+
if transient_error_type:
|
|
1044
|
+
retry_data["transient_error_type"] = transient_error_type
|
|
1045
|
+
retry_lines.append(json.dumps(retry_data) + "\n")
|
|
1001
1046
|
# Output something to get a total count the first time around
|
|
1002
1047
|
if not is_transient_retry:
|
|
1003
1048
|
out_lines.append("%d %s\n" % (url.idx, TRANSIENT_RETRY_LINE_CONTENT))
|
|
@@ -1035,22 +1080,21 @@ def _populate_prefixes(prefixes, inputs):
|
|
|
1035
1080
|
for idx, l in enumerate(f, start=len(prefixes)):
|
|
1036
1081
|
s = l.split(b" ")
|
|
1037
1082
|
if len(s) == 1:
|
|
1083
|
+
# User input format: <url>
|
|
1038
1084
|
url = url_unquote(s[0].strip())
|
|
1039
1085
|
prefixes.append((idx, url, url, None))
|
|
1040
1086
|
elif len(s) == 2:
|
|
1087
|
+
# User input format: <url> <range>
|
|
1041
1088
|
url = url_unquote(s[0].strip())
|
|
1042
1089
|
prefixes.append((idx, url, url, url_unquote(s[1].strip())))
|
|
1043
|
-
|
|
1090
|
+
elif len(s) in (4, 5):
|
|
1091
|
+
# Retry format: <idx> <prefix> <url> <range> [<transient_error_type>]
|
|
1092
|
+
# The transient_error_type (5th field) is optional and only used for logging.
|
|
1093
|
+
# Lines with other field counts (e.g., 3) are silently ignored as invalid.
|
|
1044
1094
|
is_transient_retry = True
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
else:
|
|
1049
|
-
# Special case when we have both prefix and URL -- this is
|
|
1050
|
-
# used in recursive gets for example
|
|
1051
|
-
prefix = url_unquote(s[1].strip())
|
|
1052
|
-
url = url_unquote(s[2].strip())
|
|
1053
|
-
range_info = url_unquote(s[3].strip())
|
|
1095
|
+
prefix = url_unquote(s[1].strip())
|
|
1096
|
+
url = url_unquote(s[2].strip())
|
|
1097
|
+
range_info = url_unquote(s[3].strip())
|
|
1054
1098
|
if range_info == "<norange>":
|
|
1055
1099
|
range_info = None
|
|
1056
1100
|
prefixes.append(
|
|
@@ -1114,7 +1158,7 @@ def get(
|
|
|
1114
1158
|
urllist = []
|
|
1115
1159
|
to_iterate, is_transient_retry = _populate_prefixes(prefixes, inputs)
|
|
1116
1160
|
for idx, prefix, url, r in to_iterate:
|
|
1117
|
-
src = urlparse(url)
|
|
1161
|
+
src = urlparse(url, allow_fragments=False)
|
|
1118
1162
|
url = S3Url(
|
|
1119
1163
|
url=url,
|
|
1120
1164
|
bucket=src.netloc,
|
|
@@ -1161,7 +1205,7 @@ def get(
|
|
|
1161
1205
|
|
|
1162
1206
|
# exclude the non-existent files from loading
|
|
1163
1207
|
to_load = [url for url, size in urls if size is not None]
|
|
1164
|
-
sz_results = process_urls(
|
|
1208
|
+
sz_results, transient_error_type = process_urls(
|
|
1165
1209
|
dl_op, to_load, verbose, inject_failure, num_workers, s3config
|
|
1166
1210
|
)
|
|
1167
1211
|
# We check if there is any access denied
|
|
@@ -1197,21 +1241,19 @@ def get(
|
|
|
1197
1241
|
break
|
|
1198
1242
|
out_lines.append(format_result_line(url.idx, url.url) + "\n")
|
|
1199
1243
|
elif sz == -ERROR_TRANSIENT:
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
+ "\n"
|
|
1214
|
-
)
|
|
1244
|
+
retry_line_parts = [
|
|
1245
|
+
str(url.idx),
|
|
1246
|
+
url_quote(url.prefix).decode(encoding="utf-8"),
|
|
1247
|
+
url_quote(url.url).decode(encoding="utf-8"),
|
|
1248
|
+
(
|
|
1249
|
+
url_quote(url.range).decode(encoding="utf-8")
|
|
1250
|
+
if url.range
|
|
1251
|
+
else "<norange>"
|
|
1252
|
+
),
|
|
1253
|
+
]
|
|
1254
|
+
if transient_error_type:
|
|
1255
|
+
retry_line_parts.append(transient_error_type)
|
|
1256
|
+
retry_lines.append(" ".join(retry_line_parts) + "\n")
|
|
1215
1257
|
# First time around, we output something to indicate the total length
|
|
1216
1258
|
if not is_transient_retry:
|
|
1217
1259
|
out_lines.append("%d %s\n" % (url.idx, TRANSIENT_RETRY_LINE_CONTENT))
|
|
@@ -1263,7 +1305,7 @@ def info(
|
|
|
1263
1305
|
urllist = []
|
|
1264
1306
|
to_iterate, is_transient_retry = _populate_prefixes(prefixes, inputs)
|
|
1265
1307
|
for idx, prefix, url, _ in to_iterate:
|
|
1266
|
-
src = urlparse(url)
|
|
1308
|
+
src = urlparse(url, allow_fragments=False)
|
|
1267
1309
|
url = S3Url(
|
|
1268
1310
|
url=url,
|
|
1269
1311
|
bucket=src.netloc,
|
|
@@ -1277,7 +1319,7 @@ def info(
|
|
|
1277
1319
|
exit(ERROR_INVALID_URL, url)
|
|
1278
1320
|
urllist.append(url)
|
|
1279
1321
|
|
|
1280
|
-
sz_results = process_urls(
|
|
1322
|
+
sz_results, transient_error_type = process_urls(
|
|
1281
1323
|
"info", urllist, verbose, inject_failure, num_workers, s3config
|
|
1282
1324
|
)
|
|
1283
1325
|
|
|
@@ -1290,10 +1332,15 @@ def info(
|
|
|
1290
1332
|
format_result_line(url.idx, url.prefix, url.url, url.local) + "\n"
|
|
1291
1333
|
)
|
|
1292
1334
|
else:
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1335
|
+
retry_line_parts = [
|
|
1336
|
+
str(url.idx),
|
|
1337
|
+
url_quote(url.prefix).decode(encoding="utf-8"),
|
|
1338
|
+
url_quote(url.url).decode(encoding="utf-8"),
|
|
1339
|
+
"<norange>",
|
|
1340
|
+
]
|
|
1341
|
+
if transient_error_type:
|
|
1342
|
+
retry_line_parts.append(transient_error_type)
|
|
1343
|
+
retry_lines.append(" ".join(retry_line_parts) + "\n")
|
|
1297
1344
|
if not is_transient_retry:
|
|
1298
1345
|
out_lines.append("%d %s\n" % (url.idx, TRANSIENT_RETRY_LINE_CONTENT))
|
|
1299
1346
|
|