cloud-files 4.21.0__tar.gz → 4.22.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud-files-4.21.0 → cloud-files-4.22.0}/.github/workflows/test-suite.yml +1 -1
- {cloud-files-4.21.0 → cloud-files-4.22.0}/AUTHORS +1 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/ChangeLog +17 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/PKG-INFO +28 -3
- {cloud-files-4.21.0 → cloud-files-4.22.0}/automated_test.py +12 -2
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/PKG-INFO +28 -3
- cloud-files-4.22.0/cloud_files.egg-info/pbr.json +1 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/requires.txt +1 -1
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/cloudfiles.py +6 -1
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/connectionpools.py +14 -5
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/interfaces.py +52 -14
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/paths.py +2 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/secrets.py +9 -7
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles_cli/cloudfiles_cli.py +33 -17
- {cloud-files-4.21.0 → cloud-files-4.22.0}/setup.cfg +1 -1
- cloud-files-4.21.0/cloud_files.egg-info/pbr.json +0 -1
- {cloud-files-4.21.0 → cloud-files-4.22.0}/LICENSE +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/MANIFEST.in +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/README.md +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/SOURCES.txt +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/dependency_links.txt +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/entry_points.txt +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/not-zip-safe +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloud_files.egg-info/top_level.txt +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/__init__.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/compression.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/exceptions.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/gcs.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/lib.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/resumable_tools.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/scheduler.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/threaded_queue.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles/typing.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles_cli/LICENSE +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/cloudfiles_cli/__init__.py +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/requirements.txt +0 -0
- {cloud-files-4.21.0 → cloud-files-4.22.0}/setup.py +0 -0
|
@@ -2,4 +2,5 @@ Manuel Castro <macastro@princeton.edu>
|
|
|
2
2
|
Nico Kemnitz <nkemnitz@princeton.edu>
|
|
3
3
|
V24 <55334829+umarfarouk98@users.noreply.github.com>
|
|
4
4
|
William Silversmith <william.silversmith@gmail.com>
|
|
5
|
+
madiganz <madiganz@users.noreply.github.com>
|
|
5
6
|
ranlu <ranlu@users.noreply.github.com>
|
|
@@ -1,6 +1,23 @@
|
|
|
1
1
|
CHANGES
|
|
2
2
|
=======
|
|
3
3
|
|
|
4
|
+
4.22.0
|
|
5
|
+
------
|
|
6
|
+
|
|
7
|
+
* feat: add no\_sign\_request for s3 services
|
|
8
|
+
* ci: remove py37 add py312
|
|
9
|
+
* fixtest: bump moto version and fix backwards incompatible calls
|
|
10
|
+
* fixtest: files present don't interfere with aws credentials
|
|
11
|
+
* Add support for AWS\_SESSION\_TOKEN (#102)
|
|
12
|
+
* perf: faster path extraction via caching computations
|
|
13
|
+
|
|
14
|
+
4.21.1
|
|
15
|
+
------
|
|
16
|
+
|
|
17
|
+
* chore: update ChangeLog
|
|
18
|
+
* fix: annotate other callsites with locks
|
|
19
|
+
* perf: fix potential for multi-thread collision when creating pools
|
|
20
|
+
|
|
4
21
|
4.21.0
|
|
5
22
|
------
|
|
6
23
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.22.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -18,10 +18,35 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
19
|
Requires-Python: >=3.7,<4.0
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
|
-
Provides-Extra: test
|
|
22
|
-
Provides-Extra: numpy
|
|
23
21
|
License-File: LICENSE
|
|
24
22
|
License-File: AUTHORS
|
|
23
|
+
Requires-Dist: boto3>=1.4.7
|
|
24
|
+
Requires-Dist: brotli
|
|
25
|
+
Requires-Dist: crc32c
|
|
26
|
+
Requires-Dist: chardet>=3.0.4
|
|
27
|
+
Requires-Dist: click
|
|
28
|
+
Requires-Dist: deflate>=0.2.0
|
|
29
|
+
Requires-Dist: gevent
|
|
30
|
+
Requires-Dist: google-auth>=1.10.0
|
|
31
|
+
Requires-Dist: google-cloud-core>=1.1.0
|
|
32
|
+
Requires-Dist: google-cloud-storage>=1.31.1
|
|
33
|
+
Requires-Dist: google-crc32c>=1.0.0
|
|
34
|
+
Requires-Dist: orjson
|
|
35
|
+
Requires-Dist: pathos
|
|
36
|
+
Requires-Dist: protobuf>=3.3.0
|
|
37
|
+
Requires-Dist: requests>=2.22.0
|
|
38
|
+
Requires-Dist: six>=1.14.0
|
|
39
|
+
Requires-Dist: tenacity>=4.10.0
|
|
40
|
+
Requires-Dist: tqdm
|
|
41
|
+
Requires-Dist: urllib3>=1.26.3
|
|
42
|
+
Requires-Dist: zstandard
|
|
43
|
+
Requires-Dist: rsa>=4.7.2
|
|
44
|
+
Requires-Dist: fasteners
|
|
45
|
+
Provides-Extra: test
|
|
46
|
+
Requires-Dist: pytest; extra == "test"
|
|
47
|
+
Requires-Dist: moto>=5; extra == "test"
|
|
48
|
+
Provides-Extra: numpy
|
|
49
|
+
Requires-Dist: numpy; extra == "numpy"
|
|
25
50
|
|
|
26
51
|
[](https://badge.fury.io/py/cloud-files) [](https://github.com/seung-lab/cloud-files/actions?query=workflow%3A%22Test+Suite%22)
|
|
27
52
|
|
|
@@ -4,7 +4,7 @@ import re
|
|
|
4
4
|
import shutil
|
|
5
5
|
import time
|
|
6
6
|
|
|
7
|
-
from moto import
|
|
7
|
+
from moto import mock_aws
|
|
8
8
|
|
|
9
9
|
COMPRESSION_TYPES = [
|
|
10
10
|
None, False, True,
|
|
@@ -36,7 +36,7 @@ def aws_credentials():
|
|
|
36
36
|
|
|
37
37
|
@pytest.fixture(scope='function')
|
|
38
38
|
def s3(aws_credentials):
|
|
39
|
-
with
|
|
39
|
+
with mock_aws():
|
|
40
40
|
import boto3
|
|
41
41
|
conn = boto3.client('s3', region_name='us-east-1')
|
|
42
42
|
conn.create_bucket(Bucket="cloudfiles")
|
|
@@ -44,6 +44,16 @@ def s3(aws_credentials):
|
|
|
44
44
|
conn.create_bucket(Bucket="cloudfiles_dest")
|
|
45
45
|
yield conn
|
|
46
46
|
|
|
47
|
+
def test_aws_credentials(aws_credentials):
|
|
48
|
+
from cloudfiles import secrets
|
|
49
|
+
expected = {
|
|
50
|
+
'AWS_ACCESS_KEY_ID': 'testing',
|
|
51
|
+
'AWS_SECRET_ACCESS_KEY': 'testing',
|
|
52
|
+
'AWS_SESSION_TOKEN': 'testing',
|
|
53
|
+
'AWS_DEFAULT_REGION': 'us-east-1',
|
|
54
|
+
}
|
|
55
|
+
assert secrets.aws_credentials(skip_files=True) == expected
|
|
56
|
+
|
|
47
57
|
@pytest.mark.parametrize("green", (False, True))
|
|
48
58
|
@pytest.mark.parametrize("num_threads", (0, 5, 20))
|
|
49
59
|
@pytest.mark.parametrize("protocol", ('mem', 'file', 's3'))#'gs'))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.22.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -18,10 +18,35 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
19
|
Requires-Python: >=3.7,<4.0
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
|
-
Provides-Extra: test
|
|
22
|
-
Provides-Extra: numpy
|
|
23
21
|
License-File: LICENSE
|
|
24
22
|
License-File: AUTHORS
|
|
23
|
+
Requires-Dist: boto3>=1.4.7
|
|
24
|
+
Requires-Dist: brotli
|
|
25
|
+
Requires-Dist: crc32c
|
|
26
|
+
Requires-Dist: chardet>=3.0.4
|
|
27
|
+
Requires-Dist: click
|
|
28
|
+
Requires-Dist: deflate>=0.2.0
|
|
29
|
+
Requires-Dist: gevent
|
|
30
|
+
Requires-Dist: google-auth>=1.10.0
|
|
31
|
+
Requires-Dist: google-cloud-core>=1.1.0
|
|
32
|
+
Requires-Dist: google-cloud-storage>=1.31.1
|
|
33
|
+
Requires-Dist: google-crc32c>=1.0.0
|
|
34
|
+
Requires-Dist: orjson
|
|
35
|
+
Requires-Dist: pathos
|
|
36
|
+
Requires-Dist: protobuf>=3.3.0
|
|
37
|
+
Requires-Dist: requests>=2.22.0
|
|
38
|
+
Requires-Dist: six>=1.14.0
|
|
39
|
+
Requires-Dist: tenacity>=4.10.0
|
|
40
|
+
Requires-Dist: tqdm
|
|
41
|
+
Requires-Dist: urllib3>=1.26.3
|
|
42
|
+
Requires-Dist: zstandard
|
|
43
|
+
Requires-Dist: rsa>=4.7.2
|
|
44
|
+
Requires-Dist: fasteners
|
|
45
|
+
Provides-Extra: test
|
|
46
|
+
Requires-Dist: pytest; extra == "test"
|
|
47
|
+
Requires-Dist: moto>=5; extra == "test"
|
|
48
|
+
Provides-Extra: numpy
|
|
49
|
+
Requires-Dist: numpy; extra == "numpy"
|
|
25
50
|
|
|
26
51
|
[](https://badge.fury.io/py/cloud-files) [](https://github.com/seung-lab/cloud-files/actions?query=workflow%3A%22Test+Suite%22)
|
|
27
52
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"git_version": "bf417ed", "is_release": true}
|
|
@@ -251,7 +251,8 @@ class CloudFiles:
|
|
|
251
251
|
parallel:ParallelType = 1, request_payer:Optional[str] = None,
|
|
252
252
|
locking:Optional[bool] = None,
|
|
253
253
|
lock_dir:Optional[str] = None,
|
|
254
|
-
composite_upload_threshold:int = int(1e8)
|
|
254
|
+
composite_upload_threshold:int = int(1e8),
|
|
255
|
+
no_sign_request:bool = False,
|
|
255
256
|
):
|
|
256
257
|
if use_https:
|
|
257
258
|
cloudpath = paths.to_https_protocol(cloudpath)
|
|
@@ -265,6 +266,7 @@ class CloudFiles:
|
|
|
265
266
|
self.request_payer = request_payer
|
|
266
267
|
self.locking = locking
|
|
267
268
|
self.composite_upload_threshold = composite_upload_threshold
|
|
269
|
+
self.no_sign_request = bool(no_sign_request)
|
|
268
270
|
|
|
269
271
|
self._lock_dir = lock_dir
|
|
270
272
|
self._path = paths.extract(cloudpath)
|
|
@@ -315,6 +317,7 @@ class CloudFiles:
|
|
|
315
317
|
locking=self.locking,
|
|
316
318
|
lock_dir=self.lock_dir,
|
|
317
319
|
composite_upload_threshold=self.composite_upload_threshold,
|
|
320
|
+
no_sign_request=self.no_sign_request,
|
|
318
321
|
)
|
|
319
322
|
|
|
320
323
|
@property
|
|
@@ -456,6 +459,8 @@ class CloudFiles:
|
|
|
456
459
|
num_threads = self.num_threads
|
|
457
460
|
if self.protocol == "file":
|
|
458
461
|
num_threads = 1
|
|
462
|
+
elif self.protocol == "mem":
|
|
463
|
+
num_threads = 0
|
|
459
464
|
|
|
460
465
|
results = schedule_jobs(
|
|
461
466
|
fns=( partial(download, path) for path in paths ),
|
|
@@ -7,6 +7,9 @@ import time
|
|
|
7
7
|
from functools import partial
|
|
8
8
|
|
|
9
9
|
import boto3
|
|
10
|
+
from botocore import UNSIGNED
|
|
11
|
+
from botocore.config import Config
|
|
12
|
+
|
|
10
13
|
from google.cloud.storage import Client
|
|
11
14
|
from google.oauth2 import service_account
|
|
12
15
|
import tenacity
|
|
@@ -55,13 +58,13 @@ class ConnectionPool(object):
|
|
|
55
58
|
def _create_connection(self, secrets, endpoint):
|
|
56
59
|
raise NotImplementedError
|
|
57
60
|
|
|
58
|
-
def get_connection(self, secrets=None, endpoint=None):
|
|
61
|
+
def get_connection(self, secrets=None, endpoint=None, no_sign_request=False):
|
|
59
62
|
with self._lock:
|
|
60
63
|
try:
|
|
61
64
|
conn = self.pool.get(block=False)
|
|
62
65
|
self.pool.task_done()
|
|
63
66
|
except queue.Empty:
|
|
64
|
-
conn = self._create_connection(secrets, endpoint)
|
|
67
|
+
conn = self._create_connection(secrets, endpoint, no_sign_request)
|
|
65
68
|
finally:
|
|
66
69
|
self.outstanding += 1
|
|
67
70
|
|
|
@@ -103,7 +106,7 @@ class S3ConnectionPool(ConnectionPool):
|
|
|
103
106
|
super(S3ConnectionPool, self).__init__()
|
|
104
107
|
|
|
105
108
|
@retry
|
|
106
|
-
def _create_connection(self, secrets=None, endpoint=None):
|
|
109
|
+
def _create_connection(self, secrets=None, endpoint=None, no_sign_request=False):
|
|
107
110
|
if secrets is None:
|
|
108
111
|
secrets = self.credentials
|
|
109
112
|
if isinstance(secrets, str):
|
|
@@ -113,11 +116,17 @@ class S3ConnectionPool(ConnectionPool):
|
|
|
113
116
|
if endpoint is not None:
|
|
114
117
|
additional_args['endpoint_url'] = endpoint
|
|
115
118
|
|
|
119
|
+
config = None
|
|
120
|
+
if no_sign_request:
|
|
121
|
+
config = Config(signature_version=UNSIGNED)
|
|
122
|
+
|
|
116
123
|
return boto3.client(
|
|
117
124
|
's3',
|
|
118
125
|
aws_access_key_id=secrets.get('AWS_ACCESS_KEY_ID', None),
|
|
119
126
|
aws_secret_access_key=secrets.get('AWS_SECRET_ACCESS_KEY', None),
|
|
127
|
+
aws_session_token=secrets.get('AWS_SESSION_TOKEN', None),
|
|
120
128
|
region_name=secrets.get('AWS_DEFAULT_REGION', 'us-east-1'),
|
|
129
|
+
config=config,
|
|
121
130
|
**additional_args
|
|
122
131
|
)
|
|
123
132
|
|
|
@@ -135,7 +144,7 @@ class GCloudBucketPool(ConnectionPool):
|
|
|
135
144
|
super(GCloudBucketPool, self).__init__()
|
|
136
145
|
|
|
137
146
|
@retry
|
|
138
|
-
def _create_connection(self, secrets=None, endpoint=None):
|
|
147
|
+
def _create_connection(self, secrets=None, endpoint=None, no_sign_request=False):
|
|
139
148
|
if secrets is None:
|
|
140
149
|
secrets = self.credentials
|
|
141
150
|
|
|
@@ -162,7 +171,7 @@ class MemoryPool(ConnectionPool):
|
|
|
162
171
|
self.data = MEMORY_DATA
|
|
163
172
|
super(MemoryPool, self).__init__()
|
|
164
173
|
|
|
165
|
-
def _create_connection(self, secrets=None, endpoint=None):
|
|
174
|
+
def _create_connection(self, secrets=None, endpoint=None, no_sign_request=False):
|
|
166
175
|
if self.bucket not in self.data:
|
|
167
176
|
self.data[self.bucket] = {}
|
|
168
177
|
return self.data[self.bucket]
|
|
@@ -50,14 +50,27 @@ S3ConnectionPoolParams = namedtuple('S3ConnectionPoolParams', 'service bucket_na
|
|
|
50
50
|
GCloudBucketPoolParams = namedtuple('GCloudBucketPoolParams', 'bucket_name request_payer')
|
|
51
51
|
MemoryPoolParams = namedtuple('MemoryPoolParams', 'bucket_name')
|
|
52
52
|
|
|
53
|
+
GCS_BUCKET_POOL_LOCK = threading.Lock()
|
|
54
|
+
S3_BUCKET_POOL_LOCK = threading.Lock()
|
|
55
|
+
MEM_BUCKET_POOL_LOCK = threading.Lock()
|
|
56
|
+
|
|
53
57
|
def reset_connection_pools():
|
|
54
58
|
global S3_POOL
|
|
55
59
|
global GC_POOL
|
|
56
60
|
global MEM_POOL
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
+
global GCS_BUCKET_POOL_LOCK
|
|
62
|
+
global S3_BUCKET_POOL_LOCK
|
|
63
|
+
global MEM_BUCKET_POOL_LOCK
|
|
64
|
+
|
|
65
|
+
with S3_BUCKET_POOL_LOCK:
|
|
66
|
+
S3_POOL = keydefaultdict(lambda params: S3ConnectionPool(params.service, params.bucket_name))
|
|
67
|
+
|
|
68
|
+
with GCS_BUCKET_POOL_LOCK:
|
|
69
|
+
GC_POOL = keydefaultdict(lambda params: GCloudBucketPool(params.bucket_name, params.request_payer))
|
|
70
|
+
|
|
71
|
+
with MEM_BUCKET_POOL_LOCK:
|
|
72
|
+
MEM_POOL = keydefaultdict(lambda params: MemoryPool(params.bucket_name))
|
|
73
|
+
MEMORY_DATA.clear()
|
|
61
74
|
import gc
|
|
62
75
|
gc.collect()
|
|
63
76
|
|
|
@@ -340,14 +353,19 @@ class FileInterface(StorageInterface):
|
|
|
340
353
|
filenames = list(map(stripext, filenames))
|
|
341
354
|
filenames.sort()
|
|
342
355
|
return iter(filenames)
|
|
343
|
-
|
|
356
|
+
|
|
344
357
|
class MemoryInterface(StorageInterface):
|
|
345
358
|
def __init__(self, path, secrets=None, request_payer=None, **kwargs):
|
|
359
|
+
global MEM_BUCKET_POOL_LOCK
|
|
360
|
+
|
|
346
361
|
super(StorageInterface, self).__init__()
|
|
347
362
|
if request_payer is not None:
|
|
348
363
|
raise ValueError("Specifying a request payer for the MemoryInterface is not supported. request_payer must be None, got '{}'.", request_payer)
|
|
349
364
|
self._path = path
|
|
350
|
-
|
|
365
|
+
|
|
366
|
+
with MEM_BUCKET_POOL_LOCK:
|
|
367
|
+
pool = MEM_POOL[MemoryPoolParams(path.bucket)]
|
|
368
|
+
self._data = pool.get_connection(secrets, None)
|
|
351
369
|
|
|
352
370
|
def get_path_to_file(self, file_path):
|
|
353
371
|
return posixpath.join(
|
|
@@ -498,9 +516,13 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
498
516
|
def __init__(self, path, secrets=None, request_payer=None, **kwargs):
|
|
499
517
|
super(StorageInterface, self).__init__()
|
|
500
518
|
global GC_POOL
|
|
519
|
+
global GCS_BUCKET_POOL_LOCK
|
|
501
520
|
self._path = path
|
|
502
521
|
self._request_payer = request_payer
|
|
503
|
-
|
|
522
|
+
|
|
523
|
+
with GCS_BUCKET_POOL_LOCK:
|
|
524
|
+
pool = GC_POOL[GCloudBucketPoolParams(self._path.bucket, self._request_payer)]
|
|
525
|
+
self._bucket = pool.get_connection(secrets, None)
|
|
504
526
|
self._secrets = secrets
|
|
505
527
|
|
|
506
528
|
def get_path_to_file(self, file_path):
|
|
@@ -537,7 +559,9 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
537
559
|
def copy_file(self, src_path, dest_bucket, dest_key):
|
|
538
560
|
key = self.get_path_to_file(src_path)
|
|
539
561
|
source_blob = self._bucket.blob( key )
|
|
540
|
-
|
|
562
|
+
with GCS_BUCKET_POOL_LOCK:
|
|
563
|
+
pool = GC_POOL[GCloudBucketPoolParams(dest_bucket, self._request_payer)]
|
|
564
|
+
dest_bucket = pool.get_connection(self._secrets, None)
|
|
541
565
|
self._bucket.copy_blob(
|
|
542
566
|
source_blob, dest_bucket, dest_key
|
|
543
567
|
)
|
|
@@ -664,7 +688,9 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
664
688
|
|
|
665
689
|
def release_connection(self):
|
|
666
690
|
global GC_POOL
|
|
667
|
-
|
|
691
|
+
with GCS_BUCKET_POOL_LOCK:
|
|
692
|
+
pool = GC_POOL[GCloudBucketPoolParams(self._path.bucket, self._request_payer)]
|
|
693
|
+
pool.release_connection(self._bucket)
|
|
668
694
|
|
|
669
695
|
class HttpInterface(StorageInterface):
|
|
670
696
|
def __init__(self, path, secrets=None, request_payer=None, **kwargs):
|
|
@@ -780,7 +806,13 @@ class S3Interface(StorageInterface):
|
|
|
780
806
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Bucket.delete_objects
|
|
781
807
|
# claims batch size limit is 1000
|
|
782
808
|
delete_batch_size = 1000
|
|
783
|
-
def __init__(
|
|
809
|
+
def __init__(
|
|
810
|
+
self, path, secrets=None,
|
|
811
|
+
request_payer=None,
|
|
812
|
+
composite_upload_threshold=int(1e8),
|
|
813
|
+
no_sign_request=False,
|
|
814
|
+
**kwargs
|
|
815
|
+
):
|
|
784
816
|
super(StorageInterface, self).__init__()
|
|
785
817
|
global S3_POOL
|
|
786
818
|
|
|
@@ -797,12 +829,16 @@ class S3Interface(StorageInterface):
|
|
|
797
829
|
self._conn = self._get_bucket(path.bucket)
|
|
798
830
|
|
|
799
831
|
self.composite_upload_threshold = composite_upload_threshold
|
|
832
|
+
self.no_sign_request = no_sign_request
|
|
800
833
|
|
|
801
834
|
def _get_bucket(self, bucket_name):
|
|
835
|
+
global S3_BUCKET_POOL_LOCK
|
|
802
836
|
service = self._path.alias or 's3'
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
837
|
+
|
|
838
|
+
with S3_BUCKET_POOL_LOCK:
|
|
839
|
+
pool = S3_POOL[S3ConnectionPoolParams(service, bucket_name, self._request_payer)]
|
|
840
|
+
|
|
841
|
+
return pool.get_connection(self._secrets, self._path.host)
|
|
806
842
|
|
|
807
843
|
def get_path_to_file(self, file_path):
|
|
808
844
|
return posixpath.join(self._path.path, file_path)
|
|
@@ -1074,4 +1110,6 @@ class S3Interface(StorageInterface):
|
|
|
1074
1110
|
def release_connection(self):
|
|
1075
1111
|
global S3_POOL
|
|
1076
1112
|
service = self._path.alias or 's3'
|
|
1077
|
-
|
|
1113
|
+
with S3_BUCKET_POOL_LOCK:
|
|
1114
|
+
pool = S3_POOL[S3ConnectionPoolParams(service, self._path.bucket, self._request_payer)]
|
|
1115
|
+
pool.release_connection(self._conn)
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
1
2
|
from collections import namedtuple
|
|
2
3
|
import orjson
|
|
3
4
|
import os.path
|
|
@@ -295,6 +296,7 @@ def extract_format_protocol(cloudpath:str) -> tuple:
|
|
|
295
296
|
|
|
296
297
|
return (fmt, proto, endpoint, cloudpath, alias)
|
|
297
298
|
|
|
299
|
+
@lru_cache(maxsize=10, typed=False)
|
|
298
300
|
def extract(cloudpath:str, windows=None) -> ExtractedPath:
|
|
299
301
|
"""
|
|
300
302
|
Given a valid cloudpath of the form
|
|
@@ -96,7 +96,7 @@ def google_credentials(bucket = ''):
|
|
|
96
96
|
|
|
97
97
|
AWS_CREDENTIALS_CACHE:CredentialCacheType = defaultdict(dict)
|
|
98
98
|
aws_credentials_path = secretpath('aws-secret.json')
|
|
99
|
-
def aws_credentials(bucket = '', service = 'aws'):
|
|
99
|
+
def aws_credentials(bucket = '', service = 'aws', skip_files=False):
|
|
100
100
|
global AWS_CREDENTIALS_CACHE
|
|
101
101
|
|
|
102
102
|
if service == 's3':
|
|
@@ -115,12 +115,13 @@ def aws_credentials(bucket = '', service = 'aws'):
|
|
|
115
115
|
paths = [ secretpath('{}-{}-secret.json'.format(bucket, service)) ] + paths
|
|
116
116
|
|
|
117
117
|
aws_credentials = {}
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
118
|
+
if not skip_files:
|
|
119
|
+
aws_credentials_path = secretpath(default_file_path)
|
|
120
|
+
for aws_credentials_path in paths:
|
|
121
|
+
if os.path.exists(aws_credentials_path):
|
|
122
|
+
with open(aws_credentials_path, 'r') as f:
|
|
123
|
+
aws_credentials = json.loads(f.read())
|
|
124
|
+
break
|
|
124
125
|
|
|
125
126
|
if not aws_credentials:
|
|
126
127
|
# did not find any secret json file, will try to find it in environment variables
|
|
@@ -128,6 +129,7 @@ def aws_credentials(bucket = '', service = 'aws'):
|
|
|
128
129
|
aws_credentials = {
|
|
129
130
|
'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'],
|
|
130
131
|
'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'],
|
|
132
|
+
'AWS_SESSION_TOKEN': os.environ['AWS_SESSION_TOKEN'],
|
|
131
133
|
}
|
|
132
134
|
if 'AWS_DEFAULT_REGION' in os.environ:
|
|
133
135
|
aws_credentials['AWS_DEFAULT_REGION'] = os.environ['AWS_DEFAULT_REGION']
|
|
@@ -172,11 +172,12 @@ def get_mfp(path, recursive):
|
|
|
172
172
|
@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
|
|
173
173
|
@click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
|
|
174
174
|
@click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
|
|
175
|
+
@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
|
|
175
176
|
@click.pass_context
|
|
176
177
|
def cp(
|
|
177
178
|
ctx, source, destination,
|
|
178
179
|
recursive, compression, progress,
|
|
179
|
-
block_size, part_bytes
|
|
180
|
+
block_size, part_bytes, no_sign_request,
|
|
180
181
|
):
|
|
181
182
|
"""
|
|
182
183
|
Copy one or more files from a source to destination.
|
|
@@ -194,9 +195,17 @@ def cp(
|
|
|
194
195
|
return
|
|
195
196
|
|
|
196
197
|
for src in source:
|
|
197
|
-
_cp_single(
|
|
198
|
-
|
|
199
|
-
|
|
198
|
+
_cp_single(
|
|
199
|
+
ctx, src, destination, recursive,
|
|
200
|
+
compression, progress, block_size,
|
|
201
|
+
part_bytes, no_sign_request
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def _cp_single(
|
|
205
|
+
ctx, source, destination, recursive,
|
|
206
|
+
compression, progress, block_size,
|
|
207
|
+
part_bytes, no_sign_request
|
|
208
|
+
):
|
|
200
209
|
use_stdin = (source == '-')
|
|
201
210
|
use_stdout = (destination == '-')
|
|
202
211
|
|
|
@@ -243,7 +252,9 @@ def _cp_single(ctx, source, destination, recursive, compression, progress, block
|
|
|
243
252
|
xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
|
|
244
253
|
srcpath = cloudpathjoin(srcpath, prefix)
|
|
245
254
|
elif many:
|
|
246
|
-
xferpaths = CloudFiles(
|
|
255
|
+
xferpaths = CloudFiles(
|
|
256
|
+
srcpath, no_sign_request=no_sign_request
|
|
257
|
+
).list(prefix=prefix, flat=flat)
|
|
247
258
|
|
|
248
259
|
destpath = ndest
|
|
249
260
|
if isinstance(xferpaths, str):
|
|
@@ -260,7 +271,7 @@ def _cp_single(ctx, source, destination, recursive, compression, progress, block
|
|
|
260
271
|
|
|
261
272
|
if not isinstance(xferpaths, str):
|
|
262
273
|
if parallel == 1:
|
|
263
|
-
_cp(srcpath, destpath, compression, progress, block_size, part_bytes, xferpaths)
|
|
274
|
+
_cp(srcpath, destpath, compression, progress, block_size, part_bytes, no_sign_request, xferpaths)
|
|
264
275
|
return
|
|
265
276
|
|
|
266
277
|
total = None
|
|
@@ -270,16 +281,16 @@ def _cp_single(ctx, source, destination, recursive, compression, progress, block
|
|
|
270
281
|
pass
|
|
271
282
|
|
|
272
283
|
if use_stdout:
|
|
273
|
-
fn = partial(_cp_stdout, srcpath)
|
|
284
|
+
fn = partial(_cp_stdout, no_sign_request, srcpath)
|
|
274
285
|
else:
|
|
275
|
-
fn = partial(_cp, srcpath, destpath, compression, False, block_size, part_bytes)
|
|
286
|
+
fn = partial(_cp, srcpath, destpath, compression, False, block_size, part_bytes, no_sign_request)
|
|
276
287
|
|
|
277
288
|
with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
|
|
278
289
|
with pathos.pools.ProcessPool(parallel) as executor:
|
|
279
290
|
for _ in executor.imap(fn, sip(xferpaths, block_size)):
|
|
280
291
|
pbar.update(block_size)
|
|
281
292
|
else:
|
|
282
|
-
cfsrc = CloudFiles(srcpath, progress=progress)
|
|
293
|
+
cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
|
|
283
294
|
if not cfsrc.exists(xferpaths):
|
|
284
295
|
print(f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}")
|
|
285
296
|
return
|
|
@@ -288,7 +299,11 @@ def _cp_single(ctx, source, destination, recursive, compression, progress, block
|
|
|
288
299
|
_cp_stdout(srcpath, xferpaths)
|
|
289
300
|
return
|
|
290
301
|
|
|
291
|
-
cfdest = CloudFiles(
|
|
302
|
+
cfdest = CloudFiles(
|
|
303
|
+
destpath,
|
|
304
|
+
progress=progress,
|
|
305
|
+
composite_upload_threshold=part_bytes,
|
|
306
|
+
)
|
|
292
307
|
|
|
293
308
|
if isdestdir:
|
|
294
309
|
new_path = os.path.basename(nsrc)
|
|
@@ -300,17 +315,17 @@ def _cp_single(ctx, source, destination, recursive, compression, progress, block
|
|
|
300
315
|
"dest_path": new_path,
|
|
301
316
|
}], reencode=compression)
|
|
302
317
|
|
|
303
|
-
def _cp(src, dst, compression, progress, block_size, part_bytes, paths):
|
|
304
|
-
cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes)
|
|
318
|
+
def _cp(src, dst, compression, progress, block_size, part_bytes, no_sign_request, paths):
|
|
319
|
+
cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
|
|
305
320
|
cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
|
|
306
321
|
cfsrc.transfer_to(
|
|
307
322
|
cfdest, paths=paths,
|
|
308
323
|
reencode=compression, block_size=block_size
|
|
309
324
|
)
|
|
310
325
|
|
|
311
|
-
def _cp_stdout(src, paths):
|
|
326
|
+
def _cp_stdout(src, no_sign_request, paths):
|
|
312
327
|
paths = toiter(paths)
|
|
313
|
-
cf = CloudFiles(src, progress=False)
|
|
328
|
+
cf = CloudFiles(src, progress=False, no_sign_request=no_sign_request)
|
|
314
329
|
for res in cf.get(paths):
|
|
315
330
|
content = res["content"].decode("utf8")
|
|
316
331
|
sys.stdout.write(content)
|
|
@@ -372,7 +387,8 @@ def xferexecute(db, progress, lease_msec, block_size):
|
|
|
372
387
|
@main.command()
|
|
373
388
|
@click.argument("sources", nargs=-1)
|
|
374
389
|
@click.option('-r', '--range', 'byte_range', default=None, help='Retrieve start-end bytes.')
|
|
375
|
-
|
|
390
|
+
@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
|
|
391
|
+
def cat(sources, byte_range, no_sign_request):
|
|
376
392
|
"""Concatenate the contents of each input file and write to stdout."""
|
|
377
393
|
if '-' in sources and len(sources) == 1:
|
|
378
394
|
sources = sys.stdin.readlines()
|
|
@@ -386,7 +402,7 @@ def cat(sources, byte_range):
|
|
|
386
402
|
byte_range[0] = int(byte_range[0] or 0)
|
|
387
403
|
byte_range[1] = int(byte_range[1]) if byte_range[1] not in ("", None) else None
|
|
388
404
|
src = normalize_path(sources[0])
|
|
389
|
-
cf = CloudFiles(os.path.dirname(src))
|
|
405
|
+
cf = CloudFiles(os.path.dirname(src), no_sign_request=no_sign_request)
|
|
390
406
|
download = cf[os.path.basename(src), byte_range[0]:byte_range[1]]
|
|
391
407
|
if download is None:
|
|
392
408
|
print(f'cloudfiles: {src} does not exist')
|
|
@@ -397,7 +413,7 @@ def cat(sources, byte_range):
|
|
|
397
413
|
for srcs in sip(sources, 10):
|
|
398
414
|
srcs = [ normalize_path(src) for src in srcs ]
|
|
399
415
|
order = { src: i for i, src in enumerate(srcs) }
|
|
400
|
-
files = cloudfiles.dl(srcs)
|
|
416
|
+
files = cloudfiles.dl(srcs, no_sign_request=no_sign_request)
|
|
401
417
|
output = [ None for _ in range(len(srcs)) ]
|
|
402
418
|
for res in files:
|
|
403
419
|
if res["content"] is None:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"git_version": "bee46e6", "is_release": true}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|