python-misc-utils 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_misc_utils/__init__.py +0 -0
- py_misc_utils/abs_timeout.py +12 -0
- py_misc_utils/alog.py +311 -0
- py_misc_utils/app_main.py +179 -0
- py_misc_utils/archive_streamer.py +112 -0
- py_misc_utils/assert_checks.py +118 -0
- py_misc_utils/ast_utils.py +121 -0
- py_misc_utils/async_manager.py +189 -0
- py_misc_utils/break_control.py +63 -0
- py_misc_utils/buffered_iterator.py +35 -0
- py_misc_utils/cached_file.py +507 -0
- py_misc_utils/call_limiter.py +26 -0
- py_misc_utils/call_result_selector.py +13 -0
- py_misc_utils/cleanups.py +85 -0
- py_misc_utils/cmd.py +97 -0
- py_misc_utils/compression.py +116 -0
- py_misc_utils/cond_waiter.py +13 -0
- py_misc_utils/context_base.py +18 -0
- py_misc_utils/context_managers.py +67 -0
- py_misc_utils/core_utils.py +577 -0
- py_misc_utils/daemon_process.py +252 -0
- py_misc_utils/data_cache.py +46 -0
- py_misc_utils/date_utils.py +90 -0
- py_misc_utils/debug.py +24 -0
- py_misc_utils/dyn_modules.py +50 -0
- py_misc_utils/dynamod.py +103 -0
- py_misc_utils/env_config.py +35 -0
- py_misc_utils/executor.py +239 -0
- py_misc_utils/file_overwrite.py +29 -0
- py_misc_utils/fin_wrap.py +77 -0
- py_misc_utils/fp_utils.py +47 -0
- py_misc_utils/fs/__init__.py +0 -0
- py_misc_utils/fs/file_fs.py +127 -0
- py_misc_utils/fs/ftp_fs.py +242 -0
- py_misc_utils/fs/gcs_fs.py +196 -0
- py_misc_utils/fs/http_fs.py +241 -0
- py_misc_utils/fs/s3_fs.py +417 -0
- py_misc_utils/fs_base.py +133 -0
- py_misc_utils/fs_utils.py +207 -0
- py_misc_utils/gcs_fs.py +169 -0
- py_misc_utils/gen_indices.py +54 -0
- py_misc_utils/gfs.py +371 -0
- py_misc_utils/git_repo.py +77 -0
- py_misc_utils/global_namespace.py +110 -0
- py_misc_utils/http_async_fetcher.py +139 -0
- py_misc_utils/http_server.py +196 -0
- py_misc_utils/http_utils.py +143 -0
- py_misc_utils/img_utils.py +20 -0
- py_misc_utils/infix_op.py +20 -0
- py_misc_utils/inspect_utils.py +205 -0
- py_misc_utils/iostream.py +21 -0
- py_misc_utils/iter_file.py +117 -0
- py_misc_utils/key_wrap.py +46 -0
- py_misc_utils/lazy_import.py +25 -0
- py_misc_utils/lockfile.py +164 -0
- py_misc_utils/mem_size.py +64 -0
- py_misc_utils/mirror_from.py +72 -0
- py_misc_utils/mmap.py +16 -0
- py_misc_utils/module_utils.py +196 -0
- py_misc_utils/moving_average.py +19 -0
- py_misc_utils/msgpack_streamer.py +26 -0
- py_misc_utils/multi_wait.py +24 -0
- py_misc_utils/multiprocessing.py +102 -0
- py_misc_utils/named_array.py +224 -0
- py_misc_utils/no_break.py +46 -0
- py_misc_utils/no_except.py +32 -0
- py_misc_utils/np_ml_framework.py +184 -0
- py_misc_utils/np_utils.py +346 -0
- py_misc_utils/ntuple_utils.py +38 -0
- py_misc_utils/num_utils.py +54 -0
- py_misc_utils/obj.py +73 -0
- py_misc_utils/object_cache.py +100 -0
- py_misc_utils/object_tracker.py +88 -0
- py_misc_utils/ordered_set.py +71 -0
- py_misc_utils/osfd.py +27 -0
- py_misc_utils/packet.py +22 -0
- py_misc_utils/parquet_streamer.py +69 -0
- py_misc_utils/pd_utils.py +254 -0
- py_misc_utils/periodic_task.py +61 -0
- py_misc_utils/pickle_wrap.py +121 -0
- py_misc_utils/pipeline.py +98 -0
- py_misc_utils/remap_pickle.py +50 -0
- py_misc_utils/resource_manager.py +155 -0
- py_misc_utils/rnd_utils.py +56 -0
- py_misc_utils/run_once.py +19 -0
- py_misc_utils/scheduler.py +135 -0
- py_misc_utils/select_params.py +300 -0
- py_misc_utils/signal.py +141 -0
- py_misc_utils/skl_utils.py +270 -0
- py_misc_utils/split.py +147 -0
- py_misc_utils/state.py +53 -0
- py_misc_utils/std_module.py +56 -0
- py_misc_utils/stream_dataframe.py +176 -0
- py_misc_utils/streamed_file.py +144 -0
- py_misc_utils/tempdir.py +79 -0
- py_misc_utils/template_replace.py +51 -0
- py_misc_utils/tensor_stream.py +269 -0
- py_misc_utils/thread_context.py +33 -0
- py_misc_utils/throttle.py +30 -0
- py_misc_utils/time_trigger.py +18 -0
- py_misc_utils/timegen.py +11 -0
- py_misc_utils/traceback.py +49 -0
- py_misc_utils/tracking_executor.py +91 -0
- py_misc_utils/transform_array.py +42 -0
- py_misc_utils/uncompress.py +35 -0
- py_misc_utils/url_fetcher.py +157 -0
- py_misc_utils/utils.py +538 -0
- py_misc_utils/varint.py +50 -0
- py_misc_utils/virt_array.py +52 -0
- py_misc_utils/weak_call.py +33 -0
- py_misc_utils/work_results.py +100 -0
- py_misc_utils/writeback_file.py +43 -0
- python_misc_utils-0.2.dist-info/METADATA +36 -0
- python_misc_utils-0.2.dist-info/RECORD +117 -0
- python_misc_utils-0.2.dist-info/WHEEL +5 -0
- python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
- python_misc_utils-0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import functools
|
|
3
|
+
import io
|
|
4
|
+
import os
|
|
5
|
+
import stat as st
|
|
6
|
+
import tempfile
|
|
7
|
+
import urllib.parse as uparse
|
|
8
|
+
|
|
9
|
+
import boto3
|
|
10
|
+
|
|
11
|
+
from .. import alog as alog
|
|
12
|
+
from .. import assert_checks as tas
|
|
13
|
+
from .. import cached_file as chf
|
|
14
|
+
from .. import fs_base as fsb
|
|
15
|
+
from .. import fs_utils as fsu
|
|
16
|
+
from .. import iter_file as itf
|
|
17
|
+
from .. import object_cache as objc
|
|
18
|
+
from .. import osfd as osfd
|
|
19
|
+
from .. import utils as ut
|
|
20
|
+
from .. import writeback_file as wbf
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# https://boto3.amazonaws.com/v1/documentation/api/1.35.9/reference/services/s3.html
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_Credentials = collections.namedtuple(
|
|
27
|
+
'Credentials',
|
|
28
|
+
'access_key, secret_key, session_token',
|
|
29
|
+
defaults=(None,),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _get_credentials(user=None):
|
|
34
|
+
if user:
|
|
35
|
+
cfg_path = os.path.join(fsu.home(), '.aws.conf')
|
|
36
|
+
if not os.path.exists(cfg_path):
|
|
37
|
+
alog.xraise(RuntimeError,
|
|
38
|
+
f'No configuration file "{cfg_path}" found to lookup credentials ' \
|
|
39
|
+
f'for user "{user}"')
|
|
40
|
+
|
|
41
|
+
cfg = ut.load_config(cfg_path)
|
|
42
|
+
users_cfg = cfg.get('users')
|
|
43
|
+
if users_cfg is None:
|
|
44
|
+
alog.xraise(RuntimeError, f'Missing "users" entry in configuration file "{cfg_path}"')
|
|
45
|
+
|
|
46
|
+
user_cfg = users_cfg.get(user)
|
|
47
|
+
if user_cfg is None:
|
|
48
|
+
alog.xraise(RuntimeError, f'Missing "{user}" entry in configuration file "{cfg_path}"')
|
|
49
|
+
|
|
50
|
+
return _Credentials(
|
|
51
|
+
access_key=user_cfg.get('access_key'),
|
|
52
|
+
secret_key=user_cfg.get('secret_key'),
|
|
53
|
+
session_token=user_cfg.get('session_token'))
|
|
54
|
+
else:
|
|
55
|
+
return _Credentials(
|
|
56
|
+
access_key=os.getenv('AWS_ACCESS_KEY_ID'),
|
|
57
|
+
secret_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
|
|
58
|
+
session_token=os.getenv('AWS_SESSION_TOKEN'))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _create_session(creds):
|
|
62
|
+
return boto3.session.Session(
|
|
63
|
+
aws_access_key_id=creds.access_key,
|
|
64
|
+
aws_secret_access_key=creds.secret_key,
|
|
65
|
+
aws_session_token=creds.session_token,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _create_client(creds):
|
|
70
|
+
return boto3.client(
|
|
71
|
+
's3',
|
|
72
|
+
aws_access_key_id=creds.access_key,
|
|
73
|
+
aws_secret_access_key=creds.secret_key,
|
|
74
|
+
aws_session_token=creds.session_token,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _make_dentry(resp, path, base_path=None):
|
|
79
|
+
etag = resp.get('ETag', '').strip('"\'')
|
|
80
|
+
etag = etag or None
|
|
81
|
+
|
|
82
|
+
size = resp.get('ContentLength')
|
|
83
|
+
if size is None:
|
|
84
|
+
size = resp.get('ObjectSize')
|
|
85
|
+
if size is None:
|
|
86
|
+
size = resp.get('Size')
|
|
87
|
+
|
|
88
|
+
mtime = resp.get('LastModified')
|
|
89
|
+
if mtime is not None:
|
|
90
|
+
mtime = mtime.timestamp()
|
|
91
|
+
|
|
92
|
+
if base_path is not None and base_path != path:
|
|
93
|
+
if base_path and not base_path.endswith('/'):
|
|
94
|
+
base_path = base_path + '/'
|
|
95
|
+
if not path.startswith(base_path):
|
|
96
|
+
return
|
|
97
|
+
name = path[len(base_path):]
|
|
98
|
+
spos = name.find('/')
|
|
99
|
+
if spos > 0:
|
|
100
|
+
name = name[: spos]
|
|
101
|
+
path = base_path + name
|
|
102
|
+
size, etag, mode = 0, None, st.S_IFDIR
|
|
103
|
+
else:
|
|
104
|
+
mode = st.S_IFREG
|
|
105
|
+
else:
|
|
106
|
+
name = os.path.basename(path)
|
|
107
|
+
mode = st.S_IFREG
|
|
108
|
+
|
|
109
|
+
return fsb.DirEntry(name=name,
|
|
110
|
+
path=path,
|
|
111
|
+
etag=etag,
|
|
112
|
+
st_mode=mode,
|
|
113
|
+
st_size=size,
|
|
114
|
+
st_ctime=mtime,
|
|
115
|
+
st_mtime=mtime)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _read_object(client, bucket, path, rdrange=None):
|
|
119
|
+
if rdrange is not None:
|
|
120
|
+
rdrange = f'bytes={rdrange[0]}-{rdrange[1] - 1}'
|
|
121
|
+
|
|
122
|
+
response = client.get_object(
|
|
123
|
+
Bucket=bucket,
|
|
124
|
+
Key=path,
|
|
125
|
+
Range=rdrange)
|
|
126
|
+
stream = response.pop('Body', None)
|
|
127
|
+
if stream is None:
|
|
128
|
+
alog.xraise(RuntimeError, f'Error reading {bucket}:{path} object: {response}')
|
|
129
|
+
|
|
130
|
+
return stream, _make_dentry(response, path)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _stat_object(client, bucket, path):
|
|
134
|
+
response = client.get_object_attributes(
|
|
135
|
+
Bucket=bucket,
|
|
136
|
+
Key=path,
|
|
137
|
+
ObjectAttributes=['ObjectSize', 'ETag'],
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return _make_dentry(response, path)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _norm_path(path):
|
|
144
|
+
if path:
|
|
145
|
+
if path == '/':
|
|
146
|
+
path = ''
|
|
147
|
+
else:
|
|
148
|
+
path = path + '/' if not path.endswith('/') else path
|
|
149
|
+
|
|
150
|
+
return path
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _list_objects(client, bucket, path, flat=True):
|
|
154
|
+
kwargs = dict()
|
|
155
|
+
while True:
|
|
156
|
+
response = client.list_objects_v2(Bucket=bucket,
|
|
157
|
+
Prefix=path,
|
|
158
|
+
**kwargs)
|
|
159
|
+
|
|
160
|
+
objects = response.get('Contents', ())
|
|
161
|
+
for obj in objects:
|
|
162
|
+
dentry = _make_dentry(obj, obj['Key'], base_path=None if flat else path)
|
|
163
|
+
if dentry is not None:
|
|
164
|
+
yield dentry
|
|
165
|
+
|
|
166
|
+
if not response.get('IsTruncated', False):
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
kwargs['ContinuationToken'] = response['NextContinuationToken']
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _list(client, bucket, path):
|
|
173
|
+
dentries = dict()
|
|
174
|
+
for dentry in _list_objects(client, bucket, path, flat=False):
|
|
175
|
+
xdentry = dentries.get(dentry.name)
|
|
176
|
+
if xdentry is not None:
|
|
177
|
+
dentry = dentry._replace(st_ctime=min(dentry.st_ctime, xdentry.st_ctime),
|
|
178
|
+
st_mtime=max(dentry.st_mtime, xdentry.st_mtime))
|
|
179
|
+
|
|
180
|
+
dentries[dentry.name] = dentry
|
|
181
|
+
|
|
182
|
+
sorted_dentries = sorted(dentries.items(), key=lambda x: (x[1].st_mode, x[0]))
|
|
183
|
+
for name, dentry in sorted_dentries:
|
|
184
|
+
yield dentry
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _stat(client, bucket, path):
|
|
188
|
+
dentries = tuple(_list(client, bucket, path))
|
|
189
|
+
|
|
190
|
+
if dentries:
|
|
191
|
+
if len(dentries) == 1:
|
|
192
|
+
return dentries[0]
|
|
193
|
+
|
|
194
|
+
ctime = min(dentry.st_ctime for dentry in dentries)
|
|
195
|
+
mtime = max(dentry.st_mtime for dentry in dentries)
|
|
196
|
+
|
|
197
|
+
bpath = path[: -1] if path.endswith('/') else path
|
|
198
|
+
name = os.path.basename(bpath)
|
|
199
|
+
|
|
200
|
+
return fsb.DirEntry(name=name,
|
|
201
|
+
path=bpath,
|
|
202
|
+
st_mode=st.S_IFDIR,
|
|
203
|
+
st_size=0,
|
|
204
|
+
st_ctime=ctime,
|
|
205
|
+
st_mtime=mtime)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _rmtree(client, bucket, path, ignore_errors=None):
|
|
209
|
+
# List all before to prevent possible erros deriving from delete-while-listing.
|
|
210
|
+
dentries = tuple(_list_objects(client, bucket, _norm_path(path)))
|
|
211
|
+
|
|
212
|
+
for dentry in dentries:
|
|
213
|
+
try:
|
|
214
|
+
client.delete_object(Bucket=bucket, Key=dentry.path)
|
|
215
|
+
except Exception as ex:
|
|
216
|
+
alog.debug(f'Failed to remove the {bucket}:{dentry.path} object: {ex}')
|
|
217
|
+
if ignore_errors in (None, False):
|
|
218
|
+
raise
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _write_object(client, bucket, path, body):
|
|
222
|
+
if isinstance(body, collections.abc.Iterator) and not hasattr(body, 'seek'):
|
|
223
|
+
body = itf.IterFile(body)
|
|
224
|
+
|
|
225
|
+
response = client.put_object(
|
|
226
|
+
Bucket=bucket,
|
|
227
|
+
Key=path,
|
|
228
|
+
Body=body,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class CacheHandler(objc.Handler):
|
|
233
|
+
|
|
234
|
+
def __init__(self, *args, **kwargs):
|
|
235
|
+
super().__init__()
|
|
236
|
+
self._args = args
|
|
237
|
+
self._kwargs = kwargs
|
|
238
|
+
|
|
239
|
+
def create(self):
|
|
240
|
+
return _create_client(*self._args, **self._kwargs)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class S3Reader:
|
|
244
|
+
|
|
245
|
+
def __init__(self, client, bucket, path, sres):
|
|
246
|
+
self._client = client
|
|
247
|
+
self._bucket = bucket
|
|
248
|
+
self._path = path
|
|
249
|
+
self._sres = sres
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def tag(cls, sres):
|
|
253
|
+
return sres.etag or chf.make_tag(size=sres.st_size, mtime=sres.st_mtime)
|
|
254
|
+
|
|
255
|
+
def support_blocks(self):
|
|
256
|
+
return True
|
|
257
|
+
|
|
258
|
+
def read_block(self, bpath, offset, size):
|
|
259
|
+
if offset != chf.CachedBlockFile.WHOLE_OFFSET:
|
|
260
|
+
size = min(size, self._sres.st_size - offset)
|
|
261
|
+
|
|
262
|
+
stream, _ = _read_object(self._client, self._bucket, self._path,
|
|
263
|
+
rdrange=(offset, offset + size))
|
|
264
|
+
|
|
265
|
+
with osfd.OsFd(bpath, os.O_CREAT | os.O_TRUNC | os.O_WRONLY, mode=0o440) as wfd:
|
|
266
|
+
for data in stream.iter_chunks():
|
|
267
|
+
os.write(wfd, data)
|
|
268
|
+
else:
|
|
269
|
+
stream, _ = _read_object(self._client, self._bucket, self._path)
|
|
270
|
+
|
|
271
|
+
with osfd.OsFd(bpath, os.O_CREAT | os.O_TRUNC | os.O_WRONLY, mode=0o440) as wfd:
|
|
272
|
+
for data in stream.iter_chunks():
|
|
273
|
+
os.write(wfd, data)
|
|
274
|
+
|
|
275
|
+
return os.path.getsize(bpath)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class S3Fs(fsb.FsBase):
|
|
279
|
+
|
|
280
|
+
ID = 's3'
|
|
281
|
+
IDS = (ID,)
|
|
282
|
+
|
|
283
|
+
def __init__(self, cache_iface=None, **kwargs):
|
|
284
|
+
super().__init__(cache_iface=cache_iface, **kwargs)
|
|
285
|
+
|
|
286
|
+
def _get_client(self, purl):
|
|
287
|
+
creds = _get_credentials(user=purl.username)
|
|
288
|
+
handler = CacheHandler(creds)
|
|
289
|
+
name = ('S3FS', purl.username or '$local')
|
|
290
|
+
|
|
291
|
+
return objc.cache().get(name, handler)
|
|
292
|
+
|
|
293
|
+
def _parse_url(self, url):
|
|
294
|
+
purl = uparse.urlparse(url)
|
|
295
|
+
purl = purl._replace(path=purl.path.lstrip('/'))
|
|
296
|
+
client = self._get_client(purl)
|
|
297
|
+
|
|
298
|
+
return client, purl
|
|
299
|
+
|
|
300
|
+
def _make_reader(self, client, purl):
|
|
301
|
+
sres = _stat_object(client, purl.hostname, purl.path)
|
|
302
|
+
|
|
303
|
+
tag = S3Reader.tag(sres)
|
|
304
|
+
meta = chf.Meta(size=sres.st_size, mtime=sres.st_mtime, tag=tag)
|
|
305
|
+
reader = S3Reader(client, purl.hostname, purl.path, sres)
|
|
306
|
+
|
|
307
|
+
return reader, meta
|
|
308
|
+
|
|
309
|
+
def _copy(self, src_url, dest_url):
|
|
310
|
+
src_client, src_purl = self._parse_url(src_url)
|
|
311
|
+
dest_client, dest_purl = self._parse_url(dest_url)
|
|
312
|
+
|
|
313
|
+
tas.check_eq(src_purl.hostname, dest_purl.hostname,
|
|
314
|
+
msg=f'Source and destination URL must be on the same bucket: ' \
|
|
315
|
+
f'{src_url} vs. {dest_url}')
|
|
316
|
+
|
|
317
|
+
src_client.copy_object(
|
|
318
|
+
Bucket=src_purl.hostname,
|
|
319
|
+
CopySource=dict(Bucket=src_purl.hostname, Key=src_purl.path),
|
|
320
|
+
Key=dest_purl.path,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def remove(self, url):
|
|
324
|
+
client, purl = self._parse_url(url)
|
|
325
|
+
client.delete_object(Bucket=purl.hostname, Key=purl.path)
|
|
326
|
+
|
|
327
|
+
def rename(self, src_url, dest_url):
|
|
328
|
+
self._copy(src_url, dest_url)
|
|
329
|
+
|
|
330
|
+
src_client, src_purl = self._parse_url(src_url)
|
|
331
|
+
src_client.delete_object(Bucket=src_purl.hostname, Key=src_purl.path)
|
|
332
|
+
|
|
333
|
+
def mkdir(self, url, mode=None):
|
|
334
|
+
pass
|
|
335
|
+
|
|
336
|
+
def makedirs(self, url, mode=None, exist_ok=None):
|
|
337
|
+
pass
|
|
338
|
+
|
|
339
|
+
def rmdir(self, url):
|
|
340
|
+
pass
|
|
341
|
+
|
|
342
|
+
def rmtree(self, url, ignore_errors=None):
|
|
343
|
+
client, purl = self._parse_url(url)
|
|
344
|
+
|
|
345
|
+
_rmtree(client, purl.hostname, purl.path, ignore_errors=ignore_errors)
|
|
346
|
+
|
|
347
|
+
def stat(self, url):
|
|
348
|
+
client, purl = self._parse_url(url)
|
|
349
|
+
|
|
350
|
+
dentry = _stat(client, purl.hostname, purl.path)
|
|
351
|
+
tas.check_is_not_none(FileNotFoundError, msg=f'Not found: {purl.hostname}:{purl.path}')
|
|
352
|
+
|
|
353
|
+
return dentry
|
|
354
|
+
|
|
355
|
+
def list(self, url):
|
|
356
|
+
client, purl = self._parse_url(url)
|
|
357
|
+
|
|
358
|
+
return _list(client, purl.hostname, purl.path)
|
|
359
|
+
|
|
360
|
+
def open(self, url, mode, **kwargs):
|
|
361
|
+
client, purl = self._parse_url(url)
|
|
362
|
+
|
|
363
|
+
if self.read_mode(mode):
|
|
364
|
+
reader, meta = self._make_reader(client, purl)
|
|
365
|
+
cfile = self._cache_iface.open(url, meta, reader, **kwargs)
|
|
366
|
+
|
|
367
|
+
return io.TextIOWrapper(cfile) if self.text_mode(mode) else cfile
|
|
368
|
+
else:
|
|
369
|
+
writeback_fn = functools.partial(self._upload_file, url)
|
|
370
|
+
if not self.truncate_mode(mode) and client.exists(purl.path):
|
|
371
|
+
url_file = self._download_file(url)
|
|
372
|
+
self.seek_stream(mode, url_file)
|
|
373
|
+
else:
|
|
374
|
+
url_file = tempfile.TemporaryFile()
|
|
375
|
+
|
|
376
|
+
wbfile = wbf.WritebackFile(url_file, writeback_fn)
|
|
377
|
+
|
|
378
|
+
return io.TextIOWrapper(wbfile) if self.text_mode(mode) else wbfile
|
|
379
|
+
|
|
380
|
+
def _upload_file(self, url, stream):
|
|
381
|
+
stream.seek(0)
|
|
382
|
+
self.put_file(url, stream)
|
|
383
|
+
|
|
384
|
+
def _download_file(self, url):
|
|
385
|
+
with cm.Wrapper(tempfile.TemporaryFile()) as ftmp:
|
|
386
|
+
for data in self.get_file(url):
|
|
387
|
+
ftmp.v.write(data)
|
|
388
|
+
|
|
389
|
+
return ftmp.detach()
|
|
390
|
+
|
|
391
|
+
def put_file(self, url, stream):
|
|
392
|
+
client, purl = self._parse_url(url)
|
|
393
|
+
|
|
394
|
+
_write_object(client, purl.hostname, purl.path, stream)
|
|
395
|
+
|
|
396
|
+
def get_file(self, url):
|
|
397
|
+
client, purl = self._parse_url(url)
|
|
398
|
+
|
|
399
|
+
stream, _ = _read_object(client, purl.hostname, purl.path)
|
|
400
|
+
for data in stream.iter_chunks():
|
|
401
|
+
yield data
|
|
402
|
+
|
|
403
|
+
def as_local(self, url, **kwargs):
|
|
404
|
+
client, purl = self._parse_url(url)
|
|
405
|
+
reader, meta = self._make_reader(client, purl)
|
|
406
|
+
|
|
407
|
+
return self._cache_iface.as_local(url, meta, reader, **kwargs)
|
|
408
|
+
|
|
409
|
+
def link(self, src_url, dest_url):
|
|
410
|
+
self._copy(src_url, dest_url)
|
|
411
|
+
|
|
412
|
+
def symlink(self, src_url, dest_url):
|
|
413
|
+
self.link(src_url, dest_url)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
FILE_SYSTEMS = (S3Fs,)
|
|
417
|
+
|
py_misc_utils/fs_base.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import collections
|
|
3
|
+
import os
|
|
4
|
+
import stat as st
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
DirEntry = collections.namedtuple(
|
|
8
|
+
'DirEntry',
|
|
9
|
+
'name, st_mode, st_size, st_ctime, st_mtime, path, etag',
|
|
10
|
+
defaults=(None, None)
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FsBase(abc.ABC):
|
|
15
|
+
|
|
16
|
+
def __init__(self, cache_iface=None, **kwargs):
|
|
17
|
+
super().__init__()
|
|
18
|
+
self._cache_iface = cache_iface
|
|
19
|
+
self._kwargs = kwargs
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def read_mode(cls, mode):
|
|
23
|
+
return 'r' in mode and not '+' in mode
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def write_mode(cls, mode):
|
|
27
|
+
return any(c in mode for c in 'wa+')
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def truncate_mode(cls, mode):
|
|
31
|
+
return 'w' in mode and '+' not in mode
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def append_mode(cls, mode):
|
|
35
|
+
return 'a' in mode
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def text_mode(cls, mode):
|
|
39
|
+
return 'b' not in mode
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def seek_stream(cls, mode, stream):
|
|
43
|
+
stream.seek(0, os.SEEK_END if self.append_mode(mode) else os.SEEK_SET)
|
|
44
|
+
|
|
45
|
+
def norm_url(self, url):
|
|
46
|
+
return url
|
|
47
|
+
|
|
48
|
+
def exists(self, url):
|
|
49
|
+
try:
|
|
50
|
+
self.stat(url)
|
|
51
|
+
|
|
52
|
+
return True
|
|
53
|
+
except:
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
def isdir(self, url):
|
|
57
|
+
try:
|
|
58
|
+
de = self.stat(url)
|
|
59
|
+
|
|
60
|
+
return st.S_ISDIR(de.st_mode)
|
|
61
|
+
except:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def isfile(self, url):
|
|
65
|
+
try:
|
|
66
|
+
de = self.stat(url)
|
|
67
|
+
|
|
68
|
+
return st.S_ISREG(de.st_mode)
|
|
69
|
+
except:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def copyfile(self, url, dest_fs, dest_url):
|
|
73
|
+
dest_fs.put_file(dest_url, self.get_file(url))
|
|
74
|
+
|
|
75
|
+
@abc.abstractmethod
|
|
76
|
+
def stat(self, url):
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@abc.abstractmethod
|
|
80
|
+
def open(self, url, mode, **kwargs):
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
@abc.abstractmethod
|
|
84
|
+
def remove(self, url):
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
@abc.abstractmethod
|
|
88
|
+
def rename(self, src_url, dest_url):
|
|
89
|
+
...
|
|
90
|
+
|
|
91
|
+
def replace(self, src_url, dest_url):
|
|
92
|
+
self.rename(src_url, dest_url)
|
|
93
|
+
|
|
94
|
+
@abc.abstractmethod
|
|
95
|
+
def mkdir(self, url, mode=None):
|
|
96
|
+
...
|
|
97
|
+
|
|
98
|
+
@abc.abstractmethod
|
|
99
|
+
def makedirs(self, url, mode=None, exist_ok=None):
|
|
100
|
+
...
|
|
101
|
+
|
|
102
|
+
@abc.abstractmethod
|
|
103
|
+
def rmdir(self, url):
|
|
104
|
+
...
|
|
105
|
+
|
|
106
|
+
@abc.abstractmethod
|
|
107
|
+
def rmtree(self, url, ignore_errors=None):
|
|
108
|
+
...
|
|
109
|
+
|
|
110
|
+
@abc.abstractmethod
|
|
111
|
+
def list(self, url):
|
|
112
|
+
...
|
|
113
|
+
|
|
114
|
+
@abc.abstractmethod
|
|
115
|
+
def put_file(self, url, data_gen):
|
|
116
|
+
...
|
|
117
|
+
|
|
118
|
+
@abc.abstractmethod
|
|
119
|
+
def get_file(self, url):
|
|
120
|
+
...
|
|
121
|
+
|
|
122
|
+
@abc.abstractmethod
|
|
123
|
+
def as_local(self, url):
|
|
124
|
+
...
|
|
125
|
+
|
|
126
|
+
@abc.abstractmethod
|
|
127
|
+
def link(self, src_url, dest_url):
|
|
128
|
+
...
|
|
129
|
+
|
|
130
|
+
@abc.abstractmethod
|
|
131
|
+
def symlink(self, src_url, dest_url):
|
|
132
|
+
...
|
|
133
|
+
|