ob-metaflow 2.11.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/R.py +10 -7
- metaflow/__init__.py +40 -25
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/typeguard/__init__.py +48 -0
- metaflow/_vendor/typeguard/_checkers.py +1070 -0
- metaflow/_vendor/typeguard/_config.py +108 -0
- metaflow/_vendor/typeguard/_decorators.py +233 -0
- metaflow/_vendor/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/typeguard/_functions.py +308 -0
- metaflow/_vendor/typeguard/_importhook.py +213 -0
- metaflow/_vendor/typeguard/_memo.py +48 -0
- metaflow/_vendor/typeguard/_pytest_plugin.py +127 -0
- metaflow/_vendor/typeguard/_suppression.py +86 -0
- metaflow/_vendor/typeguard/_transformer.py +1229 -0
- metaflow/_vendor/typeguard/_union_transformer.py +55 -0
- metaflow/_vendor/typeguard/_utils.py +173 -0
- metaflow/_vendor/typeguard/py.typed +0 -0
- metaflow/_vendor/typing_extensions.py +3641 -0
- metaflow/_vendor/v3_7/importlib_metadata/__init__.py +1063 -0
- metaflow/_vendor/v3_7/importlib_metadata/_adapters.py +68 -0
- metaflow/_vendor/v3_7/importlib_metadata/_collections.py +30 -0
- metaflow/_vendor/v3_7/importlib_metadata/_compat.py +71 -0
- metaflow/_vendor/v3_7/importlib_metadata/_functools.py +104 -0
- metaflow/_vendor/v3_7/importlib_metadata/_itertools.py +73 -0
- metaflow/_vendor/v3_7/importlib_metadata/_meta.py +48 -0
- metaflow/_vendor/v3_7/importlib_metadata/_text.py +99 -0
- metaflow/_vendor/v3_7/importlib_metadata/py.typed +0 -0
- metaflow/_vendor/v3_7/typeguard/__init__.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_checkers.py +906 -0
- metaflow/_vendor/v3_7/typeguard/_config.py +108 -0
- metaflow/_vendor/v3_7/typeguard/_decorators.py +237 -0
- metaflow/_vendor/v3_7/typeguard/_exceptions.py +42 -0
- metaflow/_vendor/v3_7/typeguard/_functions.py +310 -0
- metaflow/_vendor/v3_7/typeguard/_importhook.py +213 -0
- metaflow/_vendor/v3_7/typeguard/_memo.py +48 -0
- metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py +100 -0
- metaflow/_vendor/v3_7/typeguard/_suppression.py +88 -0
- metaflow/_vendor/v3_7/typeguard/_transformer.py +1207 -0
- metaflow/_vendor/v3_7/typeguard/_union_transformer.py +54 -0
- metaflow/_vendor/v3_7/typeguard/_utils.py +169 -0
- metaflow/_vendor/v3_7/typeguard/py.typed +0 -0
- metaflow/_vendor/v3_7/typing_extensions.py +3072 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +5 -0
- metaflow/cli.py +331 -785
- metaflow/cli_args.py +17 -0
- metaflow/cli_components/__init__.py +0 -0
- metaflow/cli_components/dump_cmd.py +96 -0
- metaflow/cli_components/init_cmd.py +52 -0
- metaflow/cli_components/run_cmds.py +546 -0
- metaflow/cli_components/step_cmd.py +334 -0
- metaflow/cli_components/utils.py +140 -0
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +467 -73
- metaflow/client/filecache.py +75 -35
- metaflow/clone_util.py +7 -1
- metaflow/cmd/code/__init__.py +231 -0
- metaflow/cmd/develop/stub_generator.py +756 -288
- metaflow/cmd/develop/stubs.py +12 -28
- metaflow/cmd/main_cli.py +6 -4
- metaflow/cmd/make_wrapper.py +78 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +41 -10
- metaflow/datastore/datastore_set.py +11 -2
- metaflow/datastore/flow_datastore.py +156 -10
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +154 -39
- metaflow/debug.py +5 -0
- metaflow/decorators.py +404 -78
- metaflow/exception.py +8 -2
- metaflow/extension_support/__init__.py +527 -376
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/extension_support/plugins.py +49 -31
- metaflow/flowspec.py +482 -33
- metaflow/graph.py +210 -42
- metaflow/includefile.py +84 -40
- metaflow/lint.py +141 -22
- metaflow/meta_files.py +13 -0
- metaflow/{metadata → metadata_provider}/heartbeat.py +24 -8
- metaflow/{metadata → metadata_provider}/metadata.py +86 -1
- metaflow/metaflow_config.py +175 -28
- metaflow/metaflow_config_funcs.py +51 -3
- metaflow/metaflow_current.py +4 -10
- metaflow/metaflow_environment.py +139 -53
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +150 -66
- metaflow/mflog/__init__.py +4 -3
- metaflow/mflog/save_logs.py +2 -2
- metaflow/multicore_utils.py +31 -14
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +149 -28
- metaflow/plugins/__init__.py +74 -5
- metaflow/plugins/airflow/airflow.py +40 -25
- metaflow/plugins/airflow/airflow_cli.py +22 -5
- metaflow/plugins/airflow/airflow_decorator.py +1 -1
- metaflow/plugins/airflow/airflow_utils.py +5 -3
- metaflow/plugins/airflow/sensors/base_sensor.py +4 -4
- metaflow/plugins/airflow/sensors/external_task_sensor.py +2 -2
- metaflow/plugins/airflow/sensors/s3_sensor.py +2 -2
- metaflow/plugins/argo/argo_client.py +78 -33
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +2410 -527
- metaflow/plugins/argo/argo_workflows_cli.py +571 -121
- metaflow/plugins/argo/argo_workflows_decorator.py +43 -12
- metaflow/plugins/argo/argo_workflows_deployer.py +106 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +453 -0
- metaflow/plugins/argo/capture_error.py +73 -0
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/jobset_input_paths.py +15 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +10 -3
- metaflow/plugins/aws/aws_utils.py +55 -2
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +33 -10
- metaflow/plugins/aws/batch/batch_client.py +4 -3
- metaflow/plugins/aws/batch/batch_decorator.py +102 -35
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +65 -8
- metaflow/plugins/aws/step_functions/step_functions_cli.py +101 -7
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -2
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +97 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +264 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_tail.py +1 -1
- metaflow/plugins/azure/includefile_support.py +2 -0
- metaflow/plugins/cards/card_cli.py +66 -30
- metaflow/plugins/cards/card_creator.py +25 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +132 -8
- metaflow/plugins/cards/card_modules/basic.py +112 -17
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/card.py +16 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +665 -28
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +36 -7
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +68 -49
- metaflow/plugins/cards/card_modules/renderer_tools.py +1 -0
- metaflow/plugins/cards/card_modules/test_cards.py +26 -12
- metaflow/plugins/cards/card_server.py +39 -14
- metaflow/plugins/cards/component_serializer.py +2 -9
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/azure_storage.py +10 -1
- metaflow/plugins/datastores/gs_storage.py +6 -2
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/local.py +2 -0
- metaflow/plugins/datatools/s3/s3.py +126 -75
- metaflow/plugins/datatools/s3/s3op.py +254 -121
- metaflow/plugins/env_escape/__init__.py +3 -3
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +24 -5
- metaflow/plugins/events_decorator.py +343 -185
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/gcp/__init__.py +1 -1
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +11 -6
- metaflow/plugins/gcp/gs_tail.py +10 -6
- metaflow/plugins/gcp/includefile_support.py +3 -0
- metaflow/plugins/kubernetes/kube_utils.py +108 -0
- metaflow/plugins/kubernetes/kubernetes.py +411 -130
- metaflow/plugins/kubernetes/kubernetes_cli.py +168 -36
- metaflow/plugins/kubernetes/kubernetes_client.py +104 -2
- metaflow/plugins/kubernetes/kubernetes_decorator.py +246 -88
- metaflow/plugins/kubernetes/kubernetes_job.py +253 -581
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +1071 -0
- metaflow/plugins/kubernetes/spot_metadata_cli.py +69 -0
- metaflow/plugins/kubernetes/spot_monitor_sidecar.py +109 -0
- metaflow/plugins/logs_cli.py +359 -0
- metaflow/plugins/{metadata → metadata_providers}/local.py +144 -84
- metaflow/plugins/{metadata → metadata_providers}/service.py +103 -26
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +128 -11
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/project_decorator.py +51 -5
- metaflow/plugins/pypi/bootstrap.py +357 -105
- metaflow/plugins/pypi/conda_decorator.py +82 -81
- metaflow/plugins/pypi/conda_environment.py +187 -52
- metaflow/plugins/pypi/micromamba.py +157 -47
- metaflow/plugins/pypi/parsers.py +268 -0
- metaflow/plugins/pypi/pip.py +88 -13
- metaflow/plugins/pypi/pypi_decorator.py +37 -1
- metaflow/plugins/pypi/utils.py +48 -2
- metaflow/plugins/resources_decorator.py +2 -2
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +26 -181
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/tag_cli.py +4 -7
- metaflow/plugins/test_unbounded_foreach_decorator.py +41 -6
- metaflow/plugins/timeout_decorator.py +3 -3
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +128 -0
- metaflow/plugins/uv/uv_environment.py +72 -0
- metaflow/procpoll.py +1 -1
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/__init__.py +0 -0
- metaflow/runner/click_api.py +717 -0
- metaflow/runner/deployer.py +470 -0
- metaflow/runner/deployer_impl.py +201 -0
- metaflow/runner/metaflow_runner.py +714 -0
- metaflow/runner/nbdeploy.py +132 -0
- metaflow/runner/nbrun.py +225 -0
- metaflow/runner/subprocess_manager.py +650 -0
- metaflow/runner/utils.py +335 -0
- metaflow/runtime.py +1078 -260
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/system/__init__.py +5 -0
- metaflow/system/system_logger.py +85 -0
- metaflow/system/system_monitor.py +108 -0
- metaflow/system/system_utils.py +19 -0
- metaflow/task.py +521 -225
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +38 -43
- metaflow/tuple_util.py +27 -0
- metaflow/user_configs/__init__.py +0 -0
- metaflow/user_configs/config_options.py +563 -0
- metaflow/user_configs/config_parameters.py +598 -0
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +243 -27
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Makefile +355 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/Tiltfile +726 -0
- ob_metaflow-2.19.7.1rc0.data/data/share/metaflow/devtools/pick_services.sh +105 -0
- ob_metaflow-2.19.7.1rc0.dist-info/METADATA +87 -0
- ob_metaflow-2.19.7.1rc0.dist-info/RECORD +445 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +1 -0
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/package.py +0 -188
- ob_metaflow-2.11.13.1.dist-info/METADATA +0 -85
- ob_metaflow-2.11.13.1.dist-info/RECORD +0 -308
- /metaflow/_vendor/{v3_5/zipp.py → zipp.py} +0 -0
- /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
- /metaflow/{metadata → metadata_provider}/util.py +0 -0
- /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info/licenses}/LICENSE +0 -0
- {ob_metaflow-2.11.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import print_function
|
|
2
2
|
|
|
3
|
+
import errno
|
|
3
4
|
import json
|
|
4
5
|
import time
|
|
5
6
|
import math
|
|
@@ -15,7 +16,9 @@ from tempfile import NamedTemporaryFile
|
|
|
15
16
|
from multiprocessing import Process, Queue
|
|
16
17
|
from itertools import starmap, chain, islice
|
|
17
18
|
|
|
19
|
+
from boto3.exceptions import RetriesExceededError, S3UploadFailedError
|
|
18
20
|
from boto3.s3.transfer import TransferConfig
|
|
21
|
+
from botocore.exceptions import ClientError, SSLError
|
|
19
22
|
|
|
20
23
|
try:
|
|
21
24
|
# python2
|
|
@@ -47,12 +50,19 @@ import metaflow.tracing as tracing
|
|
|
47
50
|
from metaflow.metaflow_config import (
|
|
48
51
|
S3_WORKER_COUNT,
|
|
49
52
|
)
|
|
53
|
+
from metaflow.exception import MetaflowException
|
|
50
54
|
|
|
51
55
|
DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
|
|
52
56
|
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
|
|
53
57
|
|
|
54
58
|
RANGE_MATCH = re.compile(r"bytes (?P<start>[0-9]+)-(?P<end>[0-9]+)/(?P<total>[0-9]+)")
|
|
55
59
|
|
|
60
|
+
# from botocore ClientError MSG_TEMPLATE:
|
|
61
|
+
# https://github.com/boto/botocore/blob/68ca78f3097906c9231840a49931ef4382c41eea/botocore/exceptions.py#L521
|
|
62
|
+
BOTOCORE_MSG_TEMPLATE_MATCH = re.compile(
|
|
63
|
+
r"An error occurred \((\w+)\) when calling the (\w+) operation.*: (.+)"
|
|
64
|
+
)
|
|
65
|
+
|
|
56
66
|
S3Config = namedtuple("S3Config", "role session_vars client_params")
|
|
57
67
|
|
|
58
68
|
|
|
@@ -97,6 +107,7 @@ ERROR_VERIFY_FAILED = 9
|
|
|
97
107
|
ERROR_LOCAL_FILE_NOT_FOUND = 10
|
|
98
108
|
ERROR_INVALID_RANGE = 11
|
|
99
109
|
ERROR_TRANSIENT = 12
|
|
110
|
+
ERROR_OUT_OF_DISK_SPACE = 13
|
|
100
111
|
|
|
101
112
|
|
|
102
113
|
def format_result_line(idx, prefix, url="", local=""):
|
|
@@ -119,9 +130,9 @@ def normalize_client_error(err):
|
|
|
119
130
|
try:
|
|
120
131
|
return int(error_code)
|
|
121
132
|
except ValueError:
|
|
122
|
-
if error_code in ("AccessDenied", "AllAccessDisabled"):
|
|
133
|
+
if error_code in ("AccessDenied", "AllAccessDisabled", "InvalidAccessKeyId"):
|
|
123
134
|
return 403
|
|
124
|
-
if error_code
|
|
135
|
+
if error_code in ("NoSuchKey", "NoSuchBucket"):
|
|
125
136
|
return 404
|
|
126
137
|
if error_code == "InvalidRange":
|
|
127
138
|
return 416
|
|
@@ -147,6 +158,7 @@ def normalize_client_error(err):
|
|
|
147
158
|
"LimitExceededException",
|
|
148
159
|
"RequestThrottled",
|
|
149
160
|
"EC2ThrottledException",
|
|
161
|
+
"InternalError",
|
|
150
162
|
):
|
|
151
163
|
return 503
|
|
152
164
|
return error_code
|
|
@@ -155,7 +167,7 @@ def normalize_client_error(err):
|
|
|
155
167
|
# S3 worker pool
|
|
156
168
|
|
|
157
169
|
|
|
158
|
-
@tracing.
|
|
170
|
+
@tracing.cli("s3op/worker")
|
|
159
171
|
def worker(result_file_name, queue, mode, s3config):
|
|
160
172
|
# Interpret mode, it can either be a single op or something like
|
|
161
173
|
# info_download or info_upload which implies:
|
|
@@ -221,54 +233,77 @@ def worker(result_file_name, queue, mode, s3config):
|
|
|
221
233
|
elif mode == "download":
|
|
222
234
|
tmp = NamedTemporaryFile(dir=".", mode="wb", delete=False)
|
|
223
235
|
try:
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
range_result = resp["ContentRange"]
|
|
229
|
-
range_result_match = RANGE_MATCH.match(range_result)
|
|
230
|
-
if range_result_match is None:
|
|
231
|
-
raise RuntimeError(
|
|
232
|
-
"Wrong format for ContentRange: %s"
|
|
233
|
-
% str(range_result)
|
|
236
|
+
try:
|
|
237
|
+
if url.range:
|
|
238
|
+
resp = s3.get_object(
|
|
239
|
+
Bucket=url.bucket, Key=url.path, Range=url.range
|
|
234
240
|
)
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
241
|
+
range_result = resp["ContentRange"]
|
|
242
|
+
range_result_match = RANGE_MATCH.match(range_result)
|
|
243
|
+
if range_result_match is None:
|
|
244
|
+
raise RuntimeError(
|
|
245
|
+
"Wrong format for ContentRange: %s"
|
|
246
|
+
% str(range_result)
|
|
247
|
+
)
|
|
248
|
+
range_result = {
|
|
249
|
+
x: int(range_result_match.group(x))
|
|
250
|
+
for x in ["total", "start", "end"]
|
|
251
|
+
}
|
|
252
|
+
else:
|
|
253
|
+
resp = s3.get_object(Bucket=url.bucket, Key=url.path)
|
|
254
|
+
range_result = None
|
|
255
|
+
sz = resp["ContentLength"]
|
|
256
|
+
if range_result is None:
|
|
257
|
+
range_result = {"total": sz, "start": 0, "end": sz - 1}
|
|
258
|
+
if not url.range and sz > DOWNLOAD_FILE_THRESHOLD:
|
|
259
|
+
# In this case, it is more efficient to use download_file as it
|
|
260
|
+
# will download multiple parts in parallel (it does it after
|
|
261
|
+
# multipart_threshold)
|
|
262
|
+
s3.download_file(url.bucket, url.path, tmp.name)
|
|
263
|
+
else:
|
|
264
|
+
read_in_chunks(
|
|
265
|
+
tmp, resp["Body"], sz, DOWNLOAD_MAX_CHUNK
|
|
266
|
+
)
|
|
267
|
+
tmp.close()
|
|
268
|
+
os.rename(tmp.name, url.local)
|
|
269
|
+
except client_error as err:
|
|
270
|
+
tmp.close()
|
|
271
|
+
os.unlink(tmp.name)
|
|
272
|
+
handle_client_error(err, idx, result_file)
|
|
260
273
|
continue
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
)
|
|
274
|
+
except RetriesExceededError as e:
|
|
275
|
+
tmp.close()
|
|
276
|
+
os.unlink(tmp.name)
|
|
277
|
+
err = convert_to_client_error(e)
|
|
278
|
+
handle_client_error(err, idx, result_file)
|
|
265
279
|
continue
|
|
266
|
-
|
|
267
|
-
|
|
280
|
+
except OSError as e:
|
|
281
|
+
tmp.close()
|
|
282
|
+
os.unlink(tmp.name)
|
|
283
|
+
if e.errno == errno.ENOSPC:
|
|
284
|
+
result_file.write(
|
|
285
|
+
"%d %d\n" % (idx, -ERROR_OUT_OF_DISK_SPACE)
|
|
286
|
+
)
|
|
287
|
+
else:
|
|
288
|
+
result_file.write(
|
|
289
|
+
"%d %d %s\n" % (idx, -ERROR_TRANSIENT, "OSError")
|
|
290
|
+
)
|
|
291
|
+
result_file.flush()
|
|
268
292
|
continue
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
293
|
+
except MetaflowException:
|
|
294
|
+
# Re-raise Metaflow exceptions (including TimeoutException)
|
|
295
|
+
tmp.close()
|
|
296
|
+
os.unlink(tmp.name)
|
|
297
|
+
raise
|
|
298
|
+
except (SSLError, Exception) as e:
|
|
299
|
+
tmp.close()
|
|
300
|
+
os.unlink(tmp.name)
|
|
301
|
+
# assume anything else is transient
|
|
302
|
+
result_file.write(
|
|
303
|
+
"%d %d %s\n" % (idx, -ERROR_TRANSIENT, type(e).__name__)
|
|
304
|
+
)
|
|
305
|
+
result_file.flush()
|
|
306
|
+
continue
|
|
272
307
|
# If we need the metadata, get it and write it out
|
|
273
308
|
if pre_op_info:
|
|
274
309
|
with open("%s_meta" % url.local, mode="w") as f:
|
|
@@ -316,28 +351,77 @@ def worker(result_file_name, queue, mode, s3config):
|
|
|
316
351
|
if url.encryption is not None:
|
|
317
352
|
extra["ServerSideEncryption"] = url.encryption
|
|
318
353
|
try:
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
# We indicate that the file was uploaded
|
|
323
|
-
result_file.write("%d %d\n" % (idx, 0))
|
|
324
|
-
except client_error as err:
|
|
325
|
-
error_code = normalize_client_error(err)
|
|
326
|
-
if error_code == 403:
|
|
327
|
-
result_file.write(
|
|
328
|
-
"%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED)
|
|
354
|
+
try:
|
|
355
|
+
s3.upload_file(
|
|
356
|
+
url.local, url.bucket, url.path, ExtraArgs=extra
|
|
329
357
|
)
|
|
358
|
+
# We indicate that the file was uploaded
|
|
359
|
+
result_file.write("%d %d\n" % (idx, 0))
|
|
360
|
+
except client_error as err:
|
|
361
|
+
# Shouldn't get here, but just in case.
|
|
362
|
+
# Internally, botocore catches ClientError and returns a S3UploadFailedError.
|
|
363
|
+
# See https://github.com/boto/boto3/blob/develop/boto3/s3/transfer.py#L377
|
|
364
|
+
handle_client_error(err, idx, result_file)
|
|
330
365
|
continue
|
|
331
|
-
|
|
332
|
-
|
|
366
|
+
except S3UploadFailedError as e:
|
|
367
|
+
err = convert_to_client_error(e)
|
|
368
|
+
handle_client_error(err, idx, result_file)
|
|
333
369
|
continue
|
|
334
|
-
|
|
335
|
-
|
|
370
|
+
except MetaflowException:
|
|
371
|
+
# Re-raise Metaflow exceptions (including TimeoutException)
|
|
372
|
+
raise
|
|
373
|
+
except (SSLError, Exception) as e:
|
|
374
|
+
# assume anything else is transient
|
|
375
|
+
result_file.write(
|
|
376
|
+
"%d %d %s\n" % (idx, -ERROR_TRANSIENT, type(e).__name__)
|
|
377
|
+
)
|
|
378
|
+
result_file.flush()
|
|
379
|
+
continue
|
|
336
380
|
except:
|
|
337
381
|
traceback.print_exc()
|
|
382
|
+
result_file.flush()
|
|
338
383
|
sys.exit(ERROR_WORKER_EXCEPTION)
|
|
339
384
|
|
|
340
385
|
|
|
386
|
+
def convert_to_client_error(e):
|
|
387
|
+
match = BOTOCORE_MSG_TEMPLATE_MATCH.search(str(e))
|
|
388
|
+
if not match:
|
|
389
|
+
raise e
|
|
390
|
+
error_code = match.group(1)
|
|
391
|
+
operation_name = match.group(2)
|
|
392
|
+
error_message = match.group(3)
|
|
393
|
+
response = {
|
|
394
|
+
"Error": {
|
|
395
|
+
"Code": error_code,
|
|
396
|
+
"Message": error_message,
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
return ClientError(response, operation_name)
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def handle_client_error(err, idx, result_file):
|
|
403
|
+
# Handle all MetaflowExceptions as fatal
|
|
404
|
+
if isinstance(err, MetaflowException):
|
|
405
|
+
raise err
|
|
406
|
+
|
|
407
|
+
error_code = normalize_client_error(err)
|
|
408
|
+
original_error_code = err.response["Error"]["Code"]
|
|
409
|
+
|
|
410
|
+
if error_code == 404:
|
|
411
|
+
result_file.write("%d %d\n" % (idx, -ERROR_URL_NOT_FOUND))
|
|
412
|
+
result_file.flush()
|
|
413
|
+
elif error_code == 403:
|
|
414
|
+
result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED))
|
|
415
|
+
result_file.flush()
|
|
416
|
+
elif error_code == 503:
|
|
417
|
+
result_file.write("%d %d %s\n" % (idx, -ERROR_TRANSIENT, original_error_code))
|
|
418
|
+
result_file.flush()
|
|
419
|
+
else:
|
|
420
|
+
# optimistically assume it is a transient error
|
|
421
|
+
result_file.write("%d %d %s\n" % (idx, -ERROR_TRANSIENT, original_error_code))
|
|
422
|
+
result_file.flush()
|
|
423
|
+
|
|
424
|
+
|
|
341
425
|
def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
342
426
|
# We start the minimum of len(urls) or num_workers to avoid starting
|
|
343
427
|
# workers that will definitely do nothing
|
|
@@ -347,6 +431,7 @@ def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
|
347
431
|
random.seed()
|
|
348
432
|
|
|
349
433
|
sz_results = []
|
|
434
|
+
transient_error_type = None
|
|
350
435
|
# 1. push sources and destinations to the queue
|
|
351
436
|
# We only push if we don't inject a failure; otherwise, we already set the sz_results
|
|
352
437
|
# appropriately with the result of the injected failure.
|
|
@@ -381,17 +466,39 @@ def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
|
381
466
|
if proc.exitcode is not None:
|
|
382
467
|
if proc.exitcode != 0:
|
|
383
468
|
msg = "Worker process failed (exit code %d)" % proc.exitcode
|
|
469
|
+
|
|
470
|
+
# IMPORTANT: if this process has put items on a queue, then it will not terminate
|
|
471
|
+
# until all buffered items have been flushed to the pipe, causing a deadlock.
|
|
472
|
+
# `cancel_join_thread()` allows it to exit without flushing the queue.
|
|
473
|
+
# Without this line, the parent process would hang indefinitely when a subprocess
|
|
474
|
+
# did not exit cleanly in the case of unhandled exceptions.
|
|
475
|
+
#
|
|
476
|
+
# The error situation is:
|
|
477
|
+
# 1. this process puts stuff in queue
|
|
478
|
+
# 2. subprocess dies so doesn't consume its end-of-queue marker (the None)
|
|
479
|
+
# 3. other subprocesses consume all useful bits AND their end-of-queue marker
|
|
480
|
+
# 4. one marker is left and not consumed
|
|
481
|
+
# 5. this process cannot shut down until the queue is empty.
|
|
482
|
+
# 6. it will never be empty because all subprocesses (workers) have died.
|
|
483
|
+
queue.cancel_join_thread()
|
|
484
|
+
|
|
384
485
|
exit(msg, proc.exitcode)
|
|
385
486
|
# Read the output file if all went well
|
|
386
487
|
with open(out_path, "r") as out_file:
|
|
387
488
|
for line in out_file:
|
|
388
|
-
line_split = line.split(" ")
|
|
389
|
-
|
|
489
|
+
line_split = line.split(" ", 2)
|
|
490
|
+
idx = int(line_split[0])
|
|
491
|
+
size = int(line_split[1])
|
|
492
|
+
sz_results[idx] = size
|
|
493
|
+
|
|
494
|
+
# For transient errors, store the transient error type (should be the same for all)
|
|
495
|
+
if size == -ERROR_TRANSIENT and len(line_split) > 2:
|
|
496
|
+
transient_error_type = line_split[2].strip()
|
|
390
497
|
else:
|
|
391
498
|
# Put this process back in the processes to check
|
|
392
499
|
new_procs[proc] = out_path
|
|
393
500
|
procs = new_procs
|
|
394
|
-
return sz_results
|
|
501
|
+
return sz_results, transient_error_type
|
|
395
502
|
|
|
396
503
|
|
|
397
504
|
def process_urls(mode, urls, verbose, inject_failure, num_workers, s3config):
|
|
@@ -400,7 +507,9 @@ def process_urls(mode, urls, verbose, inject_failure, num_workers, s3config):
|
|
|
400
507
|
print("%sing %d files.." % (mode.capitalize(), len(urls)), file=sys.stderr)
|
|
401
508
|
|
|
402
509
|
start = time.time()
|
|
403
|
-
sz_results = start_workers(
|
|
510
|
+
sz_results, transient_error_type = start_workers(
|
|
511
|
+
mode, urls, num_workers, inject_failure, s3config
|
|
512
|
+
)
|
|
404
513
|
end = time.time()
|
|
405
514
|
|
|
406
515
|
if verbose:
|
|
@@ -417,7 +526,7 @@ def process_urls(mode, urls, verbose, inject_failure, num_workers, s3config):
|
|
|
417
526
|
),
|
|
418
527
|
file=sys.stderr,
|
|
419
528
|
)
|
|
420
|
-
return sz_results
|
|
529
|
+
return sz_results, transient_error_type
|
|
421
530
|
|
|
422
531
|
|
|
423
532
|
# Utility functions
|
|
@@ -502,11 +611,12 @@ class S3Ops(object):
|
|
|
502
611
|
# - the trailing slash is significant in S3
|
|
503
612
|
if "Contents" in page:
|
|
504
613
|
for key in page.get("Contents", []):
|
|
505
|
-
|
|
614
|
+
key_path = key["Key"].lstrip("/")
|
|
615
|
+
url = url_base + key_path
|
|
506
616
|
urlobj = S3Url(
|
|
507
617
|
url=url,
|
|
508
618
|
bucket=prefix_url.bucket,
|
|
509
|
-
path=
|
|
619
|
+
path=key_path,
|
|
510
620
|
local=generate_local_path(url),
|
|
511
621
|
prefix=prefix_url.url,
|
|
512
622
|
)
|
|
@@ -573,6 +683,8 @@ def exit(exit_code, url):
|
|
|
573
683
|
msg = "Local file not found: %s" % url
|
|
574
684
|
elif exit_code == ERROR_TRANSIENT:
|
|
575
685
|
msg = "Transient error for url: %s" % url
|
|
686
|
+
elif exit_code == ERROR_OUT_OF_DISK_SPACE:
|
|
687
|
+
msg = "Out of disk space when downloading URL: %s" % url
|
|
576
688
|
else:
|
|
577
689
|
msg = "Unknown error"
|
|
578
690
|
print("s3op failed:\n%s" % msg, file=sys.stderr)
|
|
@@ -612,9 +724,21 @@ def generate_local_path(url, range="whole", suffix=None):
|
|
|
612
724
|
quoted = url_quote(url)
|
|
613
725
|
fname = quoted.split(b"/")[-1].replace(b".", b"_").replace(b"-", b"_")
|
|
614
726
|
sha = sha1(quoted).hexdigest()
|
|
727
|
+
|
|
728
|
+
# Truncate fname to ensure the final filename doesn't exceed filesystem limits.
|
|
729
|
+
# Most filesystems have a 255 character limit. The structure is:
|
|
730
|
+
# <40-char-sha>-<fname>-<range>[-<suffix>]
|
|
731
|
+
# We need to leave room for: sha (40) + hyphens (2-3) + range (~10) + suffix (~10)
|
|
732
|
+
# This leaves roughly 190 characters for fname. We use 150 to be safe.
|
|
733
|
+
fname_decoded = fname.decode("utf-8")
|
|
734
|
+
max_fname_len = 150
|
|
735
|
+
if len(fname_decoded) > max_fname_len:
|
|
736
|
+
# Truncate and add an ellipsis to indicate truncation
|
|
737
|
+
fname_decoded = fname_decoded[:max_fname_len] + "..."
|
|
738
|
+
|
|
615
739
|
if suffix:
|
|
616
|
-
return "-".join((sha,
|
|
617
|
-
return "-".join((sha,
|
|
740
|
+
return "-".join((sha, fname_decoded, range, suffix))
|
|
741
|
+
return "-".join((sha, fname_decoded, range))
|
|
618
742
|
|
|
619
743
|
|
|
620
744
|
def parallel_op(op, lst, num_workers):
|
|
@@ -722,8 +846,8 @@ def cli():
|
|
|
722
846
|
pass
|
|
723
847
|
|
|
724
848
|
|
|
725
|
-
@tracing.cli_entrypoint("s3op/list")
|
|
726
849
|
@cli.command("list", help="List S3 objects")
|
|
850
|
+
@tracing.cli("s3op/list")
|
|
727
851
|
@click.option(
|
|
728
852
|
"--recursive/--no-recursive",
|
|
729
853
|
default=False,
|
|
@@ -751,11 +875,16 @@ def lst(
|
|
|
751
875
|
urllist = []
|
|
752
876
|
to_iterate, _ = _populate_prefixes(prefixes, inputs)
|
|
753
877
|
for _, prefix, url, _ in to_iterate:
|
|
754
|
-
src = urlparse(url)
|
|
878
|
+
src = urlparse(url, allow_fragments=False)
|
|
879
|
+
# We always consider the path being passed in to be a directory path so
|
|
880
|
+
# we add a trailing slash to the path if it doesn't already have one.
|
|
881
|
+
path_with_slash = src.path.lstrip("/")
|
|
882
|
+
if not path_with_slash.endswith("/"):
|
|
883
|
+
path_with_slash += "/"
|
|
755
884
|
url = S3Url(
|
|
756
885
|
url=url,
|
|
757
886
|
bucket=src.netloc,
|
|
758
|
-
path=
|
|
887
|
+
path=path_with_slash,
|
|
759
888
|
local=None,
|
|
760
889
|
prefix=prefix,
|
|
761
890
|
)
|
|
@@ -782,8 +911,8 @@ def lst(
|
|
|
782
911
|
print(format_result_line(idx, url.prefix, url.url, str(size)))
|
|
783
912
|
|
|
784
913
|
|
|
785
|
-
@tracing.cli_entrypoint("s3op/put")
|
|
786
914
|
@cli.command(help="Upload files to S3")
|
|
915
|
+
@tracing.cli("s3op/put")
|
|
787
916
|
@click.option(
|
|
788
917
|
"--file",
|
|
789
918
|
"files",
|
|
@@ -857,7 +986,7 @@ def put(
|
|
|
857
986
|
yield input_line_idx, local, url, content_type, metadata, encryption
|
|
858
987
|
|
|
859
988
|
def _make_url(idx, local, user_url, content_type, metadata, encryption):
|
|
860
|
-
src = urlparse(user_url)
|
|
989
|
+
src = urlparse(user_url, allow_fragments=False)
|
|
861
990
|
url = S3Url(
|
|
862
991
|
url=user_url,
|
|
863
992
|
bucket=src.netloc,
|
|
@@ -885,7 +1014,7 @@ def put(
|
|
|
885
1014
|
ul_op = "upload"
|
|
886
1015
|
if not overwrite:
|
|
887
1016
|
ul_op = "info_upload"
|
|
888
|
-
sz_results = process_urls(
|
|
1017
|
+
sz_results, transient_error_type = process_urls(
|
|
889
1018
|
ul_op, urls, verbose, inject_failure, num_workers, s3config
|
|
890
1019
|
)
|
|
891
1020
|
retry_lines = []
|
|
@@ -903,19 +1032,17 @@ def put(
|
|
|
903
1032
|
elif listing and sz == 0:
|
|
904
1033
|
out_lines.append(format_result_line(url.idx, url.url) + "\n")
|
|
905
1034
|
elif sz == -ERROR_TRANSIENT:
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
+ "\n"
|
|
918
|
-
)
|
|
1035
|
+
retry_data = {
|
|
1036
|
+
"idx": url.idx,
|
|
1037
|
+
"url": url.url,
|
|
1038
|
+
"local": url.local,
|
|
1039
|
+
"content_type": url.content_type,
|
|
1040
|
+
"metadata": url.metadata,
|
|
1041
|
+
"encryption": url.encryption,
|
|
1042
|
+
}
|
|
1043
|
+
if transient_error_type:
|
|
1044
|
+
retry_data["transient_error_type"] = transient_error_type
|
|
1045
|
+
retry_lines.append(json.dumps(retry_data) + "\n")
|
|
919
1046
|
# Output something to get a total count the first time around
|
|
920
1047
|
if not is_transient_retry:
|
|
921
1048
|
out_lines.append("%d %s\n" % (url.idx, TRANSIENT_RETRY_LINE_CONTENT))
|
|
@@ -953,22 +1080,21 @@ def _populate_prefixes(prefixes, inputs):
|
|
|
953
1080
|
for idx, l in enumerate(f, start=len(prefixes)):
|
|
954
1081
|
s = l.split(b" ")
|
|
955
1082
|
if len(s) == 1:
|
|
1083
|
+
# User input format: <url>
|
|
956
1084
|
url = url_unquote(s[0].strip())
|
|
957
1085
|
prefixes.append((idx, url, url, None))
|
|
958
1086
|
elif len(s) == 2:
|
|
1087
|
+
# User input format: <url> <range>
|
|
959
1088
|
url = url_unquote(s[0].strip())
|
|
960
1089
|
prefixes.append((idx, url, url, url_unquote(s[1].strip())))
|
|
961
|
-
|
|
1090
|
+
elif len(s) in (4, 5):
|
|
1091
|
+
# Retry format: <idx> <prefix> <url> <range> [<transient_error_type>]
|
|
1092
|
+
# The transient_error_type (5th field) is optional and only used for logging.
|
|
1093
|
+
# Lines with other field counts (e.g., 3) are silently ignored as invalid.
|
|
962
1094
|
is_transient_retry = True
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
else:
|
|
967
|
-
# Special case when we have both prefix and URL -- this is
|
|
968
|
-
# used in recursive gets for example
|
|
969
|
-
prefix = url_unquote(s[1].strip())
|
|
970
|
-
url = url_unquote(s[2].strip())
|
|
971
|
-
range_info = url_unquote(s[3].strip())
|
|
1095
|
+
prefix = url_unquote(s[1].strip())
|
|
1096
|
+
url = url_unquote(s[2].strip())
|
|
1097
|
+
range_info = url_unquote(s[3].strip())
|
|
972
1098
|
if range_info == "<norange>":
|
|
973
1099
|
range_info = None
|
|
974
1100
|
prefixes.append(
|
|
@@ -977,8 +1103,8 @@ def _populate_prefixes(prefixes, inputs):
|
|
|
977
1103
|
return prefixes, is_transient_retry
|
|
978
1104
|
|
|
979
1105
|
|
|
980
|
-
@tracing.cli_entrypoint("s3op/get")
|
|
981
1106
|
@cli.command(help="Download files from S3")
|
|
1107
|
+
@tracing.cli("s3op/get")
|
|
982
1108
|
@click.option(
|
|
983
1109
|
"--recursive/--no-recursive",
|
|
984
1110
|
default=False,
|
|
@@ -1032,7 +1158,7 @@ def get(
|
|
|
1032
1158
|
urllist = []
|
|
1033
1159
|
to_iterate, is_transient_retry = _populate_prefixes(prefixes, inputs)
|
|
1034
1160
|
for idx, prefix, url, r in to_iterate:
|
|
1035
|
-
src = urlparse(url)
|
|
1161
|
+
src = urlparse(url, allow_fragments=False)
|
|
1036
1162
|
url = S3Url(
|
|
1037
1163
|
url=url,
|
|
1038
1164
|
bucket=src.netloc,
|
|
@@ -1079,7 +1205,7 @@ def get(
|
|
|
1079
1205
|
|
|
1080
1206
|
# exclude the non-existent files from loading
|
|
1081
1207
|
to_load = [url for url, size in urls if size is not None]
|
|
1082
|
-
sz_results = process_urls(
|
|
1208
|
+
sz_results, transient_error_type = process_urls(
|
|
1083
1209
|
dl_op, to_load, verbose, inject_failure, num_workers, s3config
|
|
1084
1210
|
)
|
|
1085
1211
|
# We check if there is any access denied
|
|
@@ -1103,6 +1229,8 @@ def get(
|
|
|
1103
1229
|
)
|
|
1104
1230
|
if verify:
|
|
1105
1231
|
verify_info.append((url, sz))
|
|
1232
|
+
elif sz == -ERROR_OUT_OF_DISK_SPACE:
|
|
1233
|
+
exit(ERROR_OUT_OF_DISK_SPACE, url)
|
|
1106
1234
|
elif sz == -ERROR_URL_ACCESS_DENIED:
|
|
1107
1235
|
denied_url = url
|
|
1108
1236
|
break
|
|
@@ -1113,19 +1241,19 @@ def get(
|
|
|
1113
1241
|
break
|
|
1114
1242
|
out_lines.append(format_result_line(url.idx, url.url) + "\n")
|
|
1115
1243
|
elif sz == -ERROR_TRANSIENT:
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
)
|
|
1244
|
+
retry_line_parts = [
|
|
1245
|
+
str(url.idx),
|
|
1246
|
+
url_quote(url.prefix).decode(encoding="utf-8"),
|
|
1247
|
+
url_quote(url.url).decode(encoding="utf-8"),
|
|
1248
|
+
(
|
|
1249
|
+
url_quote(url.range).decode(encoding="utf-8")
|
|
1250
|
+
if url.range
|
|
1251
|
+
else "<norange>"
|
|
1252
|
+
),
|
|
1253
|
+
]
|
|
1254
|
+
if transient_error_type:
|
|
1255
|
+
retry_line_parts.append(transient_error_type)
|
|
1256
|
+
retry_lines.append(" ".join(retry_line_parts) + "\n")
|
|
1129
1257
|
# First time around, we output something to indicate the total length
|
|
1130
1258
|
if not is_transient_retry:
|
|
1131
1259
|
out_lines.append("%d %s\n" % (url.idx, TRANSIENT_RETRY_LINE_CONTENT))
|
|
@@ -1177,7 +1305,7 @@ def info(
|
|
|
1177
1305
|
urllist = []
|
|
1178
1306
|
to_iterate, is_transient_retry = _populate_prefixes(prefixes, inputs)
|
|
1179
1307
|
for idx, prefix, url, _ in to_iterate:
|
|
1180
|
-
src = urlparse(url)
|
|
1308
|
+
src = urlparse(url, allow_fragments=False)
|
|
1181
1309
|
url = S3Url(
|
|
1182
1310
|
url=url,
|
|
1183
1311
|
bucket=src.netloc,
|
|
@@ -1191,7 +1319,7 @@ def info(
|
|
|
1191
1319
|
exit(ERROR_INVALID_URL, url)
|
|
1192
1320
|
urllist.append(url)
|
|
1193
1321
|
|
|
1194
|
-
sz_results = process_urls(
|
|
1322
|
+
sz_results, transient_error_type = process_urls(
|
|
1195
1323
|
"info", urllist, verbose, inject_failure, num_workers, s3config
|
|
1196
1324
|
)
|
|
1197
1325
|
|
|
@@ -1204,10 +1332,15 @@ def info(
|
|
|
1204
1332
|
format_result_line(url.idx, url.prefix, url.url, url.local) + "\n"
|
|
1205
1333
|
)
|
|
1206
1334
|
else:
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1335
|
+
retry_line_parts = [
|
|
1336
|
+
str(url.idx),
|
|
1337
|
+
url_quote(url.prefix).decode(encoding="utf-8"),
|
|
1338
|
+
url_quote(url.url).decode(encoding="utf-8"),
|
|
1339
|
+
"<norange>",
|
|
1340
|
+
]
|
|
1341
|
+
if transient_error_type:
|
|
1342
|
+
retry_line_parts.append(transient_error_type)
|
|
1343
|
+
retry_lines.append(" ".join(retry_line_parts) + "\n")
|
|
1211
1344
|
if not is_transient_retry:
|
|
1212
1345
|
out_lines.append("%d %s\n" % (url.idx, TRANSIENT_RETRY_LINE_CONTENT))
|
|
1213
1346
|
|
|
@@ -124,9 +124,9 @@ def load():
|
|
|
124
124
|
cur_path = os.path.dirname(__file__)
|
|
125
125
|
sys.path = [p for p in old_paths if p != cur_path]
|
|
126
126
|
# Handle special case where we launch a shell (including with a command)
|
|
127
|
-
# and we are in the CWD (searched if '' is
|
|
128
|
-
if cur_path == os.getcwd() and sys.path
|
|
129
|
-
sys.path
|
|
127
|
+
# and we are in the CWD (searched if '' is present in sys.path)
|
|
128
|
+
if cur_path == os.getcwd() and '' in sys.path:
|
|
129
|
+
sys.path.remove("")
|
|
130
130
|
|
|
131
131
|
# Remove the module (this file) to reload it properly. Do *NOT* update sys.modules but
|
|
132
132
|
# modify directly since it may be referenced elsewhere
|