cloud-files 4.27.0__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/AUTHORS +1 -0
- {cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/METADATA +101 -21
- cloud_files-6.0.0.dist-info/RECORD +27 -0
- {cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/WHEEL +1 -1
- cloud_files-6.0.0.dist-info/pbr.json +1 -0
- cloudfiles/cloudfiles.py +548 -78
- cloudfiles/compression.py +8 -3
- cloudfiles/exceptions.py +4 -0
- cloudfiles/gcs.py +7 -3
- cloudfiles/interfaces.py +462 -69
- cloudfiles/lib.py +12 -2
- cloudfiles/monitoring.py +724 -0
- cloudfiles/paths.py +61 -5
- cloudfiles/resumable_tools.py +50 -15
- cloudfiles/scheduler.py +6 -1
- cloudfiles/secrets.py +16 -12
- cloudfiles/test.py +28 -0
- cloudfiles_cli/cloudfiles_cli.py +349 -41
- cloud_files-4.27.0.dist-info/RECORD +0 -26
- cloud_files-4.27.0.dist-info/pbr.json +0 -1
- cloudfiles/buckets.py +0 -10
- {cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/LICENSE +0 -0
- {cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/entry_points.txt +0 -0
- {cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/top_level.txt +0 -0
cloudfiles/paths.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import lru_cache
|
|
2
|
-
from collections import namedtuple
|
|
2
|
+
from collections import namedtuple, defaultdict
|
|
3
3
|
import orjson
|
|
4
4
|
import os.path
|
|
5
5
|
import posixpath
|
|
@@ -8,17 +8,21 @@ import sys
|
|
|
8
8
|
import urllib.parse
|
|
9
9
|
|
|
10
10
|
from typing import Tuple, Optional
|
|
11
|
+
from .typing import GetPathType
|
|
11
12
|
|
|
12
13
|
from .exceptions import UnsupportedProtocolError
|
|
13
|
-
from .lib import yellow, toabs, jsonify, mkdir
|
|
14
|
+
from .lib import yellow, toabs, jsonify, mkdir, toiter
|
|
14
15
|
from .secrets import CLOUD_FILES_DIR
|
|
15
16
|
|
|
16
17
|
ExtractedPath = namedtuple('ExtractedPath',
|
|
17
18
|
('format', 'protocol', 'bucket', 'path', 'host', 'alias')
|
|
18
19
|
)
|
|
19
20
|
|
|
21
|
+
PRECOMPUTED_SUFFIX = '|neuroglancer-precomputed:'
|
|
22
|
+
|
|
20
23
|
ALIAS_FILE = os.path.join(CLOUD_FILES_DIR, "aliases.json")
|
|
21
24
|
OFFICIAL_ALIASES = {
|
|
25
|
+
"nokura": "s3://https://c10s.pni.princeton.edu/",
|
|
22
26
|
"matrix": "s3://https://s3-hpcrc.rc.princeton.edu/",
|
|
23
27
|
"tigerdata": "s3://https://td.princeton.edu/",
|
|
24
28
|
}
|
|
@@ -37,6 +41,10 @@ ALLOWED_FORMATS = [
|
|
|
37
41
|
'render', 'vtk', 'nifti', 'dvid',
|
|
38
42
|
]
|
|
39
43
|
|
|
44
|
+
CLOUD_PROTOCOLS = [
|
|
45
|
+
"gs", "s3"
|
|
46
|
+
] + list(OFFICIAL_ALIASES.keys())
|
|
47
|
+
|
|
40
48
|
def update_aliases_from_file():
|
|
41
49
|
global ALIASES_FROM_FILE
|
|
42
50
|
global ALIAS_FILE
|
|
@@ -55,11 +63,14 @@ def update_aliases_from_file():
|
|
|
55
63
|
|
|
56
64
|
def cloudpath_error(cloudpath):
|
|
57
65
|
return yellow(f"""
|
|
58
|
-
Cloud Path must conform to
|
|
66
|
+
Cloud Path must conform to one of:
|
|
67
|
+
(a) [FORMAT://]PROTOCOL://PATH
|
|
68
|
+
(b) PROTOCOL://PATH|FORMAT:
|
|
59
69
|
Examples:
|
|
60
70
|
precomputed://gs://test_bucket/em
|
|
61
71
|
gs://test_bucket/em
|
|
62
72
|
graphene://https://example.com/image/em
|
|
73
|
+
gs://text_bucket/em/|zarr2:
|
|
63
74
|
|
|
64
75
|
Supported Formats: None (precomputed), {", ".join(ALLOWED_FORMATS)}
|
|
65
76
|
Supported Protocols: {", ".join(ALLOWED_PROTOCOLS)}
|
|
@@ -84,6 +95,9 @@ def mkregexp():
|
|
|
84
95
|
CLOUDPATH_REGEXP = re.compile(mkregexp())
|
|
85
96
|
BUCKET_RE = re.compile(r'^(/?[~\d\w_\.\-]+(?::\d+)?)(?:/|$)') # posix /what/a/great/path
|
|
86
97
|
|
|
98
|
+
# |neuroglancer-precomputed: or |zarr2: suffixes etc
|
|
99
|
+
TAIL_FORMAT_REGEXP = re.compile('\\|(?P<fmt>[\\w\\d-]+):$')
|
|
100
|
+
|
|
87
101
|
def add_alias(alias:str, host:str):
|
|
88
102
|
global ALIASES
|
|
89
103
|
global ALLOWED_PROTOCOLS
|
|
@@ -158,7 +172,15 @@ for alias, host in OFFICIAL_ALIASES.items():
|
|
|
158
172
|
|
|
159
173
|
## Other Path Library Functions
|
|
160
174
|
|
|
175
|
+
@lru_cache(maxsize=10, typed=False)
|
|
161
176
|
def normalize(path):
|
|
177
|
+
|
|
178
|
+
path = path.removesuffix(PRECOMPUTED_SUFFIX)
|
|
179
|
+
m = re.search(TAIL_FORMAT_REGEXP, path)
|
|
180
|
+
if m is not None:
|
|
181
|
+
path = re.sub(TAIL_FORMAT_REGEXP, "", path)
|
|
182
|
+
path = f"{m.group('fmt')}://{path}"
|
|
183
|
+
|
|
162
184
|
fmt, proto, endpoint, cloudpath, alias = extract_format_protocol(
|
|
163
185
|
path, allow_defaults=False
|
|
164
186
|
)
|
|
@@ -284,6 +306,13 @@ def pop_protocol(cloudpath):
|
|
|
284
306
|
def extract_format_protocol(cloudpath:str, allow_defaults=True) -> tuple:
|
|
285
307
|
error = UnsupportedProtocolError(cloudpath_error(cloudpath))
|
|
286
308
|
|
|
309
|
+
cloudpath = cloudpath.removesuffix(PRECOMPUTED_SUFFIX)
|
|
310
|
+
|
|
311
|
+
m = re.search(TAIL_FORMAT_REGEXP, cloudpath)
|
|
312
|
+
if m is not None:
|
|
313
|
+
cloudpath = re.sub(TAIL_FORMAT_REGEXP, "", cloudpath)
|
|
314
|
+
cloudpath = f"{m.group('fmt')}://{cloudpath}"
|
|
315
|
+
|
|
287
316
|
alias, cloudpath = resolve_alias(cloudpath)
|
|
288
317
|
|
|
289
318
|
m = re.match(CLOUDPATH_REGEXP, cloudpath)
|
|
@@ -350,7 +379,7 @@ def extract(cloudpath:str, windows=None) -> ExtractedPath:
|
|
|
350
379
|
cloudpath = toabs(cloudpath)
|
|
351
380
|
|
|
352
381
|
bucket = None
|
|
353
|
-
if protocol in
|
|
382
|
+
if protocol in CLOUD_PROTOCOLS + ['mem']:
|
|
354
383
|
match = re.match(BUCKET_RE, cloudpath)
|
|
355
384
|
if not match:
|
|
356
385
|
raise error
|
|
@@ -374,7 +403,7 @@ def extract(cloudpath:str, windows=None) -> ExtractedPath:
|
|
|
374
403
|
|
|
375
404
|
def to_https_protocol(cloudpath):
|
|
376
405
|
if isinstance(cloudpath, ExtractedPath):
|
|
377
|
-
if cloudpath.protocol in
|
|
406
|
+
if cloudpath.protocol in CLOUD_PROTOCOLS:
|
|
378
407
|
return extract(to_https_protocol(ascloudpath(cloudpath)))
|
|
379
408
|
return cloudpath
|
|
380
409
|
|
|
@@ -390,3 +419,30 @@ def to_https_protocol(cloudpath):
|
|
|
390
419
|
cloudpath = cloudpath.replace(f"{alias}://", host, 1)
|
|
391
420
|
|
|
392
421
|
return cloudpath.replace("s3://", "", 1)
|
|
422
|
+
|
|
423
|
+
def find_common_buckets(cloudpaths:GetPathType):
|
|
424
|
+
cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
|
|
425
|
+
clustered = defaultdict(list)
|
|
426
|
+
|
|
427
|
+
for path in cloudpaths:
|
|
428
|
+
pth = path
|
|
429
|
+
byte_range = None
|
|
430
|
+
if isinstance(path, dict):
|
|
431
|
+
pth = path["path"]
|
|
432
|
+
byte_range = path["byte_range"]
|
|
433
|
+
|
|
434
|
+
epath = extract(pth)
|
|
435
|
+
if epath.protocol == "file":
|
|
436
|
+
path = os.sep.join(asfilepath(epath).split(os.sep)[2:])
|
|
437
|
+
bucketpath = "file://" + os.sep.join(asfilepath(epath).split(os.sep)[:2])
|
|
438
|
+
else:
|
|
439
|
+
path = epath.path
|
|
440
|
+
bucketpath = asbucketpath(epath)
|
|
441
|
+
|
|
442
|
+
clustered[bucketpath].append({
|
|
443
|
+
"path": path,
|
|
444
|
+
"start": (byte_range[0] if byte_range else None), # type: ignore
|
|
445
|
+
"end": (byte_range[1] if byte_range else None), # type: ignore
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
return clustered
|
cloudfiles/resumable_tools.py
CHANGED
|
@@ -39,6 +39,9 @@ class ResumableFileSet:
|
|
|
39
39
|
self.conn = sqlite3.connect(db_path)
|
|
40
40
|
self.lease_msec = int(lease_msec)
|
|
41
41
|
|
|
42
|
+
self._total = 0
|
|
43
|
+
self._total_dirty = True
|
|
44
|
+
|
|
42
45
|
def __del__(self):
|
|
43
46
|
self.conn.close()
|
|
44
47
|
|
|
@@ -46,6 +49,7 @@ class ResumableFileSet:
|
|
|
46
49
|
cur = self.conn.cursor()
|
|
47
50
|
cur.execute("""DROP TABLE IF EXISTS filelist""")
|
|
48
51
|
cur.execute("""DROP TABLE IF EXISTS xfermeta""")
|
|
52
|
+
cur.execute("""DROP TABLE IF EXISTS stats""")
|
|
49
53
|
cur.close()
|
|
50
54
|
|
|
51
55
|
def create(self, src, dest, reencode=None):
|
|
@@ -53,6 +57,7 @@ class ResumableFileSet:
|
|
|
53
57
|
|
|
54
58
|
cur.execute("""DROP TABLE IF EXISTS filelist""")
|
|
55
59
|
cur.execute("""DROP TABLE IF EXISTS xfermeta""")
|
|
60
|
+
cur.execute("""DROP TABLE IF EXISTS stats""")
|
|
56
61
|
|
|
57
62
|
cur.execute(f"""
|
|
58
63
|
CREATE TABLE xfermeta (
|
|
@@ -78,6 +83,18 @@ class ResumableFileSet:
|
|
|
78
83
|
""")
|
|
79
84
|
cur.execute("CREATE INDEX resumableidxfin ON filelist(finished,lease)")
|
|
80
85
|
cur.execute("CREATE INDEX resumableidxfile ON filelist(filename)")
|
|
86
|
+
|
|
87
|
+
cur.execute(f"""
|
|
88
|
+
CREATE TABLE stats (
|
|
89
|
+
id {INTEGER} PRIMARY KEY {AUTOINC},
|
|
90
|
+
key TEXT NOT NULL,
|
|
91
|
+
value {INTEGER}
|
|
92
|
+
)
|
|
93
|
+
""")
|
|
94
|
+
cur.execute(
|
|
95
|
+
"INSERT INTO stats(id, key, value) VALUES (?,?,?)",
|
|
96
|
+
[1, 'finished', 0]
|
|
97
|
+
)
|
|
81
98
|
cur.close()
|
|
82
99
|
|
|
83
100
|
def insert(self, fname_iter):
|
|
@@ -91,7 +108,9 @@ class ResumableFileSet:
|
|
|
91
108
|
cur.execute(f"INSERT INTO filelist(filename,finished,lease) VALUES {bindlist}", filenames)
|
|
92
109
|
cur.execute("commit")
|
|
93
110
|
|
|
94
|
-
cur.close()
|
|
111
|
+
cur.close()
|
|
112
|
+
|
|
113
|
+
self._total_dirty = True
|
|
95
114
|
|
|
96
115
|
def metadata(self):
|
|
97
116
|
cur = self.conn.cursor()
|
|
@@ -111,6 +130,7 @@ class ResumableFileSet:
|
|
|
111
130
|
for filenames in sip(fname_iter, SQLITE_MAX_PARAMS):
|
|
112
131
|
bindlist = ",".join([f"{BIND}"] * len(filenames))
|
|
113
132
|
cur.execute(f"UPDATE filelist SET finished = 1 WHERE filename in ({bindlist})", filenames)
|
|
133
|
+
cur.execute(f"UPDATE stats SET value = value + {len(filenames)} WHERE id = 1")
|
|
114
134
|
cur.execute("commit")
|
|
115
135
|
cur.close()
|
|
116
136
|
|
|
@@ -120,7 +140,7 @@ class ResumableFileSet:
|
|
|
120
140
|
N = 0
|
|
121
141
|
|
|
122
142
|
while True:
|
|
123
|
-
ts = now_msec()
|
|
143
|
+
ts = now_msec()
|
|
124
144
|
cur.execute(f"""SELECT filename FROM filelist WHERE finished = 0 AND lease <= {ts} LIMIT {int(block_size)}""")
|
|
125
145
|
rows = cur.fetchmany(block_size)
|
|
126
146
|
N += len(rows)
|
|
@@ -140,31 +160,46 @@ class ResumableFileSet:
|
|
|
140
160
|
|
|
141
161
|
cur.close()
|
|
142
162
|
|
|
143
|
-
def
|
|
163
|
+
def _scalar_query(self, sql:str) -> int:
|
|
144
164
|
cur = self.conn.cursor()
|
|
145
|
-
cur.execute(
|
|
165
|
+
cur.execute(sql)
|
|
146
166
|
res = cur.fetchone()
|
|
147
167
|
cur.close()
|
|
148
168
|
return int(res[0])
|
|
149
169
|
|
|
170
|
+
def total(self):
|
|
171
|
+
"""Returns the total number of tasks (both processed and unprocessed)."""
|
|
172
|
+
if not self._total_dirty:
|
|
173
|
+
return self._total
|
|
174
|
+
|
|
175
|
+
self._total = self._scalar_query(f"SELECT max(id) FROM filelist")
|
|
176
|
+
self._total_dirty = False
|
|
177
|
+
return self._total
|
|
178
|
+
|
|
179
|
+
def finished(self):
|
|
180
|
+
return self._scalar_query(f"SELECT value FROM stats WHERE id = 1")
|
|
181
|
+
|
|
150
182
|
def remaining(self):
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
return
|
|
183
|
+
return self.total() - self.finished()
|
|
184
|
+
|
|
185
|
+
def num_leased(self):
|
|
186
|
+
ts = int(now_msec())
|
|
187
|
+
return self._scalar_query(
|
|
188
|
+
f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease > {ts}"
|
|
189
|
+
)
|
|
156
190
|
|
|
157
191
|
def available(self):
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
cur.close()
|
|
163
|
-
return int(res[0])
|
|
192
|
+
ts = int(now_msec())
|
|
193
|
+
return self._scalar_query(
|
|
194
|
+
f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease <= {ts}"
|
|
195
|
+
)
|
|
164
196
|
|
|
165
197
|
def release(self):
|
|
198
|
+
cur = self.conn.cursor()
|
|
166
199
|
cur.execute(f"UPDATE filelist SET lease = 0")
|
|
167
200
|
cur.execute("commit")
|
|
201
|
+
cur.close()
|
|
202
|
+
|
|
168
203
|
|
|
169
204
|
def __len__(self):
|
|
170
205
|
return self.remaining()
|
cloudfiles/scheduler.py
CHANGED
|
@@ -137,7 +137,12 @@ def schedule_jobs(
|
|
|
137
137
|
or (hasattr(fns, "__len__") and len(fns) <= 1)
|
|
138
138
|
):
|
|
139
139
|
return schedule_single_threaded_jobs(fns, progress, total, count_return)
|
|
140
|
-
|
|
140
|
+
|
|
141
|
+
if isinstance(total, int):
|
|
142
|
+
concurrency = min(concurrency, max(total, 1))
|
|
143
|
+
elif hasattr(fns, "__len__"):
|
|
144
|
+
concurrency = min(concurrency, max(len(fns), 1))
|
|
145
|
+
|
|
141
146
|
if green == True or (green is None and gevent.monkey.saved):
|
|
142
147
|
return schedule_green_jobs(fns, concurrency, progress, total, count_return)
|
|
143
148
|
|
cloudfiles/secrets.py
CHANGED
|
@@ -137,23 +137,27 @@ def aws_credentials(bucket = '', service = 'aws', skip_files=False):
|
|
|
137
137
|
AWS_CREDENTIALS_CACHE[service][bucket] = aws_credentials
|
|
138
138
|
return aws_credentials
|
|
139
139
|
|
|
140
|
-
CAVE_CREDENTIALS =
|
|
141
|
-
def cave_credentials():
|
|
140
|
+
CAVE_CREDENTIALS:CredentialCacheType = {}
|
|
141
|
+
def cave_credentials(server = ''):
|
|
142
142
|
global CAVE_CREDENTIALS
|
|
143
|
-
default_file_path = 'cave-secret.json'
|
|
144
|
-
path = secretpath(default_file_path)
|
|
145
143
|
|
|
146
|
-
|
|
147
|
-
|
|
144
|
+
paths = [
|
|
145
|
+
secretpath('cave-secret.json')
|
|
146
|
+
]
|
|
148
147
|
|
|
149
|
-
if
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
CAVE_CREDENTIALS
|
|
148
|
+
if server:
|
|
149
|
+
paths = [ secretpath(f'{server}-cave-secret.json') ] + paths
|
|
150
|
+
|
|
151
|
+
if server in CAVE_CREDENTIALS:
|
|
152
|
+
return CAVE_CREDENTIALS.get(server, None)
|
|
154
153
|
|
|
155
|
-
|
|
154
|
+
for path in paths:
|
|
155
|
+
if os.path.exists(path):
|
|
156
|
+
with open(path, 'rt') as f:
|
|
157
|
+
CAVE_CREDENTIALS[server] = json.loads(f.read())
|
|
158
|
+
break
|
|
156
159
|
|
|
160
|
+
return CAVE_CREDENTIALS.get(server, None)
|
|
157
161
|
|
|
158
162
|
HTTP_CREDENTIALS = None
|
|
159
163
|
def http_credentials():
|
cloudfiles/test.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import igneous.task_creation as tc
|
|
2
|
+
import os
|
|
3
|
+
from taskqueue import totask
|
|
4
|
+
import igneous.tasks
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def process_task(msg):
|
|
8
|
+
task = totask(msg)
|
|
9
|
+
task.execute()
|
|
10
|
+
return None
|
|
11
|
+
|
|
12
|
+
def submit_tasks():
|
|
13
|
+
paths = [
|
|
14
|
+
"gs://ng_scratch_ranl_7/make_cv_happy/seg/20250403024338",]
|
|
15
|
+
all_tasks = []
|
|
16
|
+
for img_path in paths:
|
|
17
|
+
mip = 0
|
|
18
|
+
num_mips = 2
|
|
19
|
+
tasks = tc.create_downsampling_tasks(img_path,
|
|
20
|
+
fill_missing=False,
|
|
21
|
+
delete_black_uploads=True,
|
|
22
|
+
mip=mip, num_mips=num_mips)
|
|
23
|
+
all_tasks += list(tasks)
|
|
24
|
+
return all_tasks
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
tasks = submit_tasks()
|
|
28
|
+
process_task(tasks[0])
|