ob-metaflow 2.12.33.1__py2.py3-none-any.whl → 2.12.35.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/metaflow_config.py +2 -0
- metaflow/multicore_utils.py +31 -14
- metaflow/parameters.py +8 -2
- metaflow/plugins/airflow/airflow.py +18 -17
- metaflow/plugins/argo/argo_workflows.py +30 -17
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +4 -8
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +4 -8
- metaflow/plugins/datatools/s3/s3.py +12 -4
- metaflow/plugins/events_decorator.py +253 -72
- metaflow/plugins/kubernetes/kube_utils.py +29 -0
- metaflow/plugins/kubernetes/kubernetes.py +4 -0
- metaflow/plugins/kubernetes/kubernetes_cli.py +8 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +17 -0
- metaflow/plugins/kubernetes/kubernetes_job.py +19 -13
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +19 -15
- metaflow/runner/deployer_impl.py +4 -8
- metaflow/runner/metaflow_runner.py +33 -29
- metaflow/runner/subprocess_manager.py +58 -9
- metaflow/runner/utils.py +185 -43
- metaflow/util.py +5 -0
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.33.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.12.33.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/RECORD +27 -27
- {ob_metaflow-2.12.33.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.33.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/WHEEL +0 -0
- {ob_metaflow-2.12.33.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.33.1.dist-info → ob_metaflow-2.12.35.1.dist-info}/top_level.txt +0 -0
metaflow/metaflow_config.py
CHANGED
|
@@ -378,6 +378,8 @@ KUBERNETES_PORT = from_conf("KUBERNETES_PORT", None)
|
|
|
378
378
|
KUBERNETES_CPU = from_conf("KUBERNETES_CPU", None)
|
|
379
379
|
KUBERNETES_MEMORY = from_conf("KUBERNETES_MEMORY", None)
|
|
380
380
|
KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
|
|
381
|
+
# Default kubernetes QoS class
|
|
382
|
+
KUBERNETES_QOS = from_conf("KUBERNETES_QOS", "burstable")
|
|
381
383
|
|
|
382
384
|
ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
|
|
383
385
|
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
|
metaflow/multicore_utils.py
CHANGED
|
@@ -6,7 +6,18 @@ from tempfile import NamedTemporaryFile
|
|
|
6
6
|
import time
|
|
7
7
|
import metaflow.tracing as tracing
|
|
8
8
|
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import (
|
|
10
|
+
Any,
|
|
11
|
+
Callable,
|
|
12
|
+
Iterable,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Optional,
|
|
16
|
+
NoReturn,
|
|
17
|
+
Tuple,
|
|
18
|
+
TypeVar,
|
|
19
|
+
Union,
|
|
20
|
+
)
|
|
10
21
|
|
|
11
22
|
try:
|
|
12
23
|
# Python 2
|
|
@@ -30,7 +41,13 @@ class MulticoreException(Exception):
|
|
|
30
41
|
pass
|
|
31
42
|
|
|
32
43
|
|
|
33
|
-
|
|
44
|
+
_A = TypeVar("_A")
|
|
45
|
+
_R = TypeVar("_R")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _spawn(
|
|
49
|
+
func: Callable[[_A], _R], arg: _A, dir: Optional[str]
|
|
50
|
+
) -> Union[Tuple[int, str], NoReturn]:
|
|
34
51
|
with NamedTemporaryFile(prefix="parallel_map_", dir=dir, delete=False) as tmpfile:
|
|
35
52
|
output_file = tmpfile.name
|
|
36
53
|
|
|
@@ -63,11 +80,11 @@ def _spawn(func, arg, dir):
|
|
|
63
80
|
|
|
64
81
|
|
|
65
82
|
def parallel_imap_unordered(
|
|
66
|
-
func: Callable[[
|
|
67
|
-
iterable: Iterable[
|
|
83
|
+
func: Callable[[_A], _R],
|
|
84
|
+
iterable: Iterable[_A],
|
|
68
85
|
max_parallel: Optional[int] = None,
|
|
69
86
|
dir: Optional[str] = None,
|
|
70
|
-
) -> Iterator[
|
|
87
|
+
) -> Iterator[_R]:
|
|
71
88
|
"""
|
|
72
89
|
Parallelizes execution of a function using multiprocessing. The result
|
|
73
90
|
order is not guaranteed.
|
|
@@ -79,9 +96,9 @@ def parallel_imap_unordered(
|
|
|
79
96
|
iterable : Iterable[Any]
|
|
80
97
|
Iterable over arguments to pass to fun
|
|
81
98
|
max_parallel int, optional, default None
|
|
82
|
-
Maximum parallelism. If not specified, uses the number of CPUs
|
|
99
|
+
Maximum parallelism. If not specified, it uses the number of CPUs
|
|
83
100
|
dir : str, optional, default None
|
|
84
|
-
If specified, directory where temporary files are created
|
|
101
|
+
If specified, it's the directory where temporary files are created
|
|
85
102
|
|
|
86
103
|
Yields
|
|
87
104
|
------
|
|
@@ -121,14 +138,14 @@ def parallel_imap_unordered(
|
|
|
121
138
|
|
|
122
139
|
|
|
123
140
|
def parallel_map(
|
|
124
|
-
func: Callable[[
|
|
125
|
-
iterable: Iterable[
|
|
141
|
+
func: Callable[[_A], _R],
|
|
142
|
+
iterable: Iterable[_A],
|
|
126
143
|
max_parallel: Optional[int] = None,
|
|
127
144
|
dir: Optional[str] = None,
|
|
128
|
-
) -> List[
|
|
145
|
+
) -> List[_R]:
|
|
129
146
|
"""
|
|
130
147
|
Parallelizes execution of a function using multiprocessing. The result
|
|
131
|
-
order is that of the arguments in `iterable
|
|
148
|
+
order is that of the arguments in `iterable`.
|
|
132
149
|
|
|
133
150
|
Parameters
|
|
134
151
|
----------
|
|
@@ -137,9 +154,9 @@ def parallel_map(
|
|
|
137
154
|
iterable : Iterable[Any]
|
|
138
155
|
Iterable over arguments to pass to fun
|
|
139
156
|
max_parallel int, optional, default None
|
|
140
|
-
Maximum parallelism. If not specified, uses the number of CPUs
|
|
157
|
+
Maximum parallelism. If not specified, it uses the number of CPUs
|
|
141
158
|
dir : str, optional, default None
|
|
142
|
-
If specified, directory where temporary files are created
|
|
159
|
+
If specified, it's the directory where temporary files are created
|
|
143
160
|
|
|
144
161
|
Returns
|
|
145
162
|
-------
|
|
@@ -155,4 +172,4 @@ def parallel_map(
|
|
|
155
172
|
res = parallel_imap_unordered(
|
|
156
173
|
wrapper, enumerate(iterable), max_parallel=max_parallel, dir=dir
|
|
157
174
|
)
|
|
158
|
-
return [r for
|
|
175
|
+
return [r for _, r in sorted(res)]
|
metaflow/parameters.py
CHANGED
|
@@ -151,6 +151,7 @@ class DeployTimeField(object):
|
|
|
151
151
|
return self._check_type(val, deploy_time)
|
|
152
152
|
|
|
153
153
|
def _check_type(self, val, deploy_time):
|
|
154
|
+
|
|
154
155
|
# it is easy to introduce a deploy-time function that accidentally
|
|
155
156
|
# returns a value whose type is not compatible with what is defined
|
|
156
157
|
# in Parameter. Let's catch those mistakes early here, instead of
|
|
@@ -158,7 +159,7 @@ class DeployTimeField(object):
|
|
|
158
159
|
|
|
159
160
|
# note: this doesn't work with long in Python2 or types defined as
|
|
160
161
|
# click types, e.g. click.INT
|
|
161
|
-
TYPES = {bool: "bool", int: "int", float: "float", list: "list"}
|
|
162
|
+
TYPES = {bool: "bool", int: "int", float: "float", list: "list", dict: "dict"}
|
|
162
163
|
|
|
163
164
|
msg = (
|
|
164
165
|
"The value returned by the deploy-time function for "
|
|
@@ -166,7 +167,12 @@ class DeployTimeField(object):
|
|
|
166
167
|
% (self.parameter_name, self.field)
|
|
167
168
|
)
|
|
168
169
|
|
|
169
|
-
if self.parameter_type
|
|
170
|
+
if isinstance(self.parameter_type, list):
|
|
171
|
+
if not any(isinstance(val, x) for x in self.parameter_type):
|
|
172
|
+
msg += "Expected one of the following %s." % TYPES[self.parameter_type]
|
|
173
|
+
raise ParameterFieldTypeMismatch(msg)
|
|
174
|
+
return str(val) if self.return_str else val
|
|
175
|
+
elif self.parameter_type in TYPES:
|
|
170
176
|
if type(val) != self.parameter_type:
|
|
171
177
|
msg += "Expected a %s." % TYPES[self.parameter_type]
|
|
172
178
|
raise ParameterFieldTypeMismatch(msg)
|
|
@@ -46,6 +46,7 @@ from metaflow.parameters import (
|
|
|
46
46
|
# TODO: Move chevron to _vendor
|
|
47
47
|
from metaflow.plugins.cards.card_modules import chevron
|
|
48
48
|
from metaflow.plugins.kubernetes.kubernetes import Kubernetes
|
|
49
|
+
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
|
|
49
50
|
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
50
51
|
from metaflow.util import compress_list, dict_to_cli_options, get_username
|
|
51
52
|
|
|
@@ -428,25 +429,25 @@ class Airflow(object):
|
|
|
428
429
|
if k8s_deco.attributes["namespace"] is not None
|
|
429
430
|
else "default"
|
|
430
431
|
)
|
|
431
|
-
|
|
432
|
+
qos_requests, qos_limits = qos_requests_and_limits(
|
|
433
|
+
k8s_deco.attributes["qos"],
|
|
434
|
+
k8s_deco.attributes["cpu"],
|
|
435
|
+
k8s_deco.attributes["memory"],
|
|
436
|
+
k8s_deco.attributes["disk"],
|
|
437
|
+
)
|
|
432
438
|
resources = dict(
|
|
433
|
-
requests=
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
439
|
+
requests=qos_requests,
|
|
440
|
+
limits={
|
|
441
|
+
**qos_limits,
|
|
442
|
+
**{
|
|
443
|
+
"%s.com/gpu".lower()
|
|
444
|
+
% k8s_deco.attributes["gpu_vendor"]: str(k8s_deco.attributes["gpu"])
|
|
445
|
+
for k in [0]
|
|
446
|
+
# Don't set GPU limits if gpu isn't specified.
|
|
447
|
+
if k8s_deco.attributes["gpu"] is not None
|
|
448
|
+
},
|
|
449
|
+
},
|
|
438
450
|
)
|
|
439
|
-
if k8s_deco.attributes["gpu"] is not None:
|
|
440
|
-
resources.update(
|
|
441
|
-
dict(
|
|
442
|
-
limits={
|
|
443
|
-
"%s.com/gpu".lower()
|
|
444
|
-
% k8s_deco.attributes["gpu_vendor"]: str(
|
|
445
|
-
k8s_deco.attributes["gpu"]
|
|
446
|
-
)
|
|
447
|
-
}
|
|
448
|
-
)
|
|
449
|
-
)
|
|
450
451
|
|
|
451
452
|
annotations = {
|
|
452
453
|
"metaflow/production_token": self.production_token,
|
|
@@ -54,6 +54,7 @@ from metaflow.metaflow_config import (
|
|
|
54
54
|
from metaflow.metaflow_config_funcs import config_values, init_config
|
|
55
55
|
from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
|
|
56
56
|
from metaflow.parameters import deploy_time_eval
|
|
57
|
+
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
|
|
57
58
|
from metaflow.plugins.kubernetes.kubernetes import (
|
|
58
59
|
parse_kube_keyvalue_list,
|
|
59
60
|
validate_kube_labels,
|
|
@@ -523,7 +524,9 @@ class ArgoWorkflows(object):
|
|
|
523
524
|
params = set(
|
|
524
525
|
[param.name.lower() for var, param in self.flow._get_parameters()]
|
|
525
526
|
)
|
|
526
|
-
|
|
527
|
+
trigger_deco = self.flow._flow_decorators.get("trigger")[0]
|
|
528
|
+
trigger_deco.format_deploytime_value()
|
|
529
|
+
for event in trigger_deco.triggers:
|
|
527
530
|
parameters = {}
|
|
528
531
|
# TODO: Add a check to guard against names starting with numerals(?)
|
|
529
532
|
if not re.match(r"^[A-Za-z0-9_.-]+$", event["name"]):
|
|
@@ -563,9 +566,11 @@ class ArgoWorkflows(object):
|
|
|
563
566
|
|
|
564
567
|
# @trigger_on_finish decorator
|
|
565
568
|
if self.flow._flow_decorators.get("trigger_on_finish"):
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
]
|
|
569
|
+
trigger_on_finish_deco = self.flow._flow_decorators.get(
|
|
570
|
+
"trigger_on_finish"
|
|
571
|
+
)[0]
|
|
572
|
+
trigger_on_finish_deco.format_deploytime_value()
|
|
573
|
+
for event in trigger_on_finish_deco.triggers:
|
|
569
574
|
# Actual filters are deduced here since we don't have access to
|
|
570
575
|
# the current object in the @trigger_on_finish decorator.
|
|
571
576
|
triggers.append(
|
|
@@ -1854,6 +1859,13 @@ class ArgoWorkflows(object):
|
|
|
1854
1859
|
if tmpfs_enabled and tmpfs_tempdir:
|
|
1855
1860
|
env["METAFLOW_TEMPDIR"] = tmpfs_path
|
|
1856
1861
|
|
|
1862
|
+
qos_requests, qos_limits = qos_requests_and_limits(
|
|
1863
|
+
resources["qos"],
|
|
1864
|
+
resources["cpu"],
|
|
1865
|
+
resources["memory"],
|
|
1866
|
+
resources["disk"],
|
|
1867
|
+
)
|
|
1868
|
+
|
|
1857
1869
|
# Create a ContainerTemplate for this node. Ideally, we would have
|
|
1858
1870
|
# liked to inline this ContainerTemplate and avoid scanning the workflow
|
|
1859
1871
|
# twice, but due to issues with variable substitution, we will have to
|
|
@@ -1917,6 +1929,7 @@ class ArgoWorkflows(object):
|
|
|
1917
1929
|
persistent_volume_claims=resources["persistent_volume_claims"],
|
|
1918
1930
|
shared_memory=shared_memory,
|
|
1919
1931
|
port=port,
|
|
1932
|
+
qos=resources["qos"],
|
|
1920
1933
|
)
|
|
1921
1934
|
|
|
1922
1935
|
for k, v in env.items():
|
|
@@ -2109,17 +2122,17 @@ class ArgoWorkflows(object):
|
|
|
2109
2122
|
image=resources["image"],
|
|
2110
2123
|
image_pull_policy=resources["image_pull_policy"],
|
|
2111
2124
|
resources=kubernetes_sdk.V1ResourceRequirements(
|
|
2112
|
-
requests=
|
|
2113
|
-
"cpu": str(resources["cpu"]),
|
|
2114
|
-
"memory": "%sM" % str(resources["memory"]),
|
|
2115
|
-
"ephemeral-storage": "%sM"
|
|
2116
|
-
% str(resources["disk"]),
|
|
2117
|
-
},
|
|
2125
|
+
requests=qos_requests,
|
|
2118
2126
|
limits={
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2127
|
+
**qos_limits,
|
|
2128
|
+
**{
|
|
2129
|
+
"%s.com/gpu".lower()
|
|
2130
|
+
% resources["gpu_vendor"]: str(
|
|
2131
|
+
resources["gpu"]
|
|
2132
|
+
)
|
|
2133
|
+
for k in [0]
|
|
2134
|
+
if resources["gpu"] is not None
|
|
2135
|
+
},
|
|
2123
2136
|
},
|
|
2124
2137
|
),
|
|
2125
2138
|
# Configure secrets
|
|
@@ -2356,7 +2369,7 @@ class ArgoWorkflows(object):
|
|
|
2356
2369
|
"memory": "500Mi",
|
|
2357
2370
|
},
|
|
2358
2371
|
),
|
|
2359
|
-
)
|
|
2372
|
+
).to_dict()
|
|
2360
2373
|
)
|
|
2361
2374
|
),
|
|
2362
2375
|
Template("capture-error-hook-fn-preflight").steps(
|
|
@@ -2715,7 +2728,7 @@ class ArgoWorkflows(object):
|
|
|
2715
2728
|
},
|
|
2716
2729
|
),
|
|
2717
2730
|
)
|
|
2718
|
-
)
|
|
2731
|
+
).to_dict()
|
|
2719
2732
|
)
|
|
2720
2733
|
)
|
|
2721
2734
|
|
|
@@ -2885,7 +2898,7 @@ class ArgoWorkflows(object):
|
|
|
2885
2898
|
"memory": "250Mi",
|
|
2886
2899
|
},
|
|
2887
2900
|
),
|
|
2888
|
-
)
|
|
2901
|
+
).to_dict()
|
|
2889
2902
|
)
|
|
2890
2903
|
)
|
|
2891
2904
|
.service_account_name(ARGO_EVENTS_SERVICE_ACCOUNT)
|
|
@@ -10,7 +10,7 @@ from metaflow.metaflow_config import KUBERNETES_NAMESPACE
|
|
|
10
10
|
from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
|
|
11
11
|
from metaflow.runner.deployer import Deployer, DeployedFlow, TriggeredRun
|
|
12
12
|
|
|
13
|
-
from metaflow.runner.utils import get_lower_level_group, handle_timeout
|
|
13
|
+
from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def generate_fake_flow_file_contents(
|
|
@@ -341,18 +341,14 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
|
|
|
341
341
|
Exception
|
|
342
342
|
If there is an error during the trigger process.
|
|
343
343
|
"""
|
|
344
|
-
with
|
|
345
|
-
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
|
346
|
-
dir=temp_dir, delete=False
|
|
347
|
-
)
|
|
348
|
-
|
|
344
|
+
with temporary_fifo() as (attribute_file_path, attribute_file_fd):
|
|
349
345
|
# every subclass needs to have `self.deployer_kwargs`
|
|
350
346
|
command = get_lower_level_group(
|
|
351
347
|
self.deployer.api,
|
|
352
348
|
self.deployer.top_level_kwargs,
|
|
353
349
|
self.deployer.TYPE,
|
|
354
350
|
self.deployer.deployer_kwargs,
|
|
355
|
-
).trigger(deployer_attribute_file=
|
|
351
|
+
).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
|
|
356
352
|
|
|
357
353
|
pid = self.deployer.spm.run_command(
|
|
358
354
|
[sys.executable, *command],
|
|
@@ -363,7 +359,7 @@ class ArgoWorkflowsDeployedFlow(DeployedFlow):
|
|
|
363
359
|
|
|
364
360
|
command_obj = self.deployer.spm.get(pid)
|
|
365
361
|
content = handle_timeout(
|
|
366
|
-
|
|
362
|
+
attribute_file_fd, command_obj, self.deployer.file_read_timeout
|
|
367
363
|
)
|
|
368
364
|
|
|
369
365
|
if command_obj.process.returncode == 0:
|
|
@@ -6,7 +6,7 @@ from typing import ClassVar, Optional, List
|
|
|
6
6
|
from metaflow.plugins.aws.step_functions.step_functions import StepFunctions
|
|
7
7
|
from metaflow.runner.deployer import DeployedFlow, TriggeredRun
|
|
8
8
|
|
|
9
|
-
from metaflow.runner.utils import get_lower_level_group, handle_timeout
|
|
9
|
+
from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class StepFunctionsTriggeredRun(TriggeredRun):
|
|
@@ -196,18 +196,14 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
196
196
|
Exception
|
|
197
197
|
If there is an error during the trigger process.
|
|
198
198
|
"""
|
|
199
|
-
with
|
|
200
|
-
tfp_runner_attribute = tempfile.NamedTemporaryFile(
|
|
201
|
-
dir=temp_dir, delete=False
|
|
202
|
-
)
|
|
203
|
-
|
|
199
|
+
with temporary_fifo() as (attribute_file_path, attribute_file_fd):
|
|
204
200
|
# every subclass needs to have `self.deployer_kwargs`
|
|
205
201
|
command = get_lower_level_group(
|
|
206
202
|
self.deployer.api,
|
|
207
203
|
self.deployer.top_level_kwargs,
|
|
208
204
|
self.deployer.TYPE,
|
|
209
205
|
self.deployer.deployer_kwargs,
|
|
210
|
-
).trigger(deployer_attribute_file=
|
|
206
|
+
).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
|
|
211
207
|
|
|
212
208
|
pid = self.deployer.spm.run_command(
|
|
213
209
|
[sys.executable, *command],
|
|
@@ -218,7 +214,7 @@ class StepFunctionsDeployedFlow(DeployedFlow):
|
|
|
218
214
|
|
|
219
215
|
command_obj = self.deployer.spm.get(pid)
|
|
220
216
|
content = handle_timeout(
|
|
221
|
-
|
|
217
|
+
attribute_file_fd, command_obj, self.deployer.file_read_timeout
|
|
222
218
|
)
|
|
223
219
|
|
|
224
220
|
if command_obj.process.returncode == 0:
|
|
@@ -600,7 +600,9 @@ class S3(object):
|
|
|
600
600
|
# returned are Unicode.
|
|
601
601
|
key = getattr(key_value, "key", key_value)
|
|
602
602
|
if self._s3root is None:
|
|
603
|
-
|
|
603
|
+
# NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
|
|
604
|
+
# Without allow_fragments=False the parsed.path for an object name with fragments is incomplete.
|
|
605
|
+
parsed = urlparse(to_unicode(key), allow_fragments=False)
|
|
604
606
|
if parsed.scheme == "s3" and parsed.path:
|
|
605
607
|
return key
|
|
606
608
|
else:
|
|
@@ -765,7 +767,9 @@ class S3(object):
|
|
|
765
767
|
"""
|
|
766
768
|
|
|
767
769
|
url = self._url(key)
|
|
768
|
-
|
|
770
|
+
# NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
|
|
771
|
+
# Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
|
|
772
|
+
src = urlparse(url, allow_fragments=False)
|
|
769
773
|
|
|
770
774
|
def _info(s3, tmp):
|
|
771
775
|
resp = s3.head_object(Bucket=src.netloc, Key=src.path.lstrip('/"'))
|
|
@@ -891,7 +895,9 @@ class S3(object):
|
|
|
891
895
|
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
|
|
892
896
|
|
|
893
897
|
url, r = self._url_and_range(key)
|
|
894
|
-
|
|
898
|
+
# NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
|
|
899
|
+
# Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
|
|
900
|
+
src = urlparse(url, allow_fragments=False)
|
|
895
901
|
|
|
896
902
|
def _download(s3, tmp):
|
|
897
903
|
if r:
|
|
@@ -1173,7 +1179,9 @@ class S3(object):
|
|
|
1173
1179
|
blob.close = lambda: None
|
|
1174
1180
|
|
|
1175
1181
|
url = self._url(key)
|
|
1176
|
-
|
|
1182
|
+
# NOTE: S3 allows fragments as part of object names, e.g. /dataset #1/data.txt
|
|
1183
|
+
# Without allow_fragments=False the parsed src.path for an object name with fragments is incomplete.
|
|
1184
|
+
src = urlparse(url, allow_fragments=False)
|
|
1177
1185
|
extra_args = None
|
|
1178
1186
|
if content_type or metadata or self._encryption:
|
|
1179
1187
|
extra_args = {}
|