fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +6 -1
- fsspec/_version.py +4 -4
- fsspec/archive.py +1 -1
- fsspec/asyn.py +35 -45
- fsspec/caching.py +161 -90
- fsspec/compression.py +2 -4
- fsspec/core.py +19 -6
- fsspec/fuse.py +2 -2
- fsspec/generic.py +5 -1
- fsspec/gui.py +4 -4
- fsspec/implementations/cached.py +105 -25
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/ftp.py +6 -6
- fsspec/implementations/git.py +3 -3
- fsspec/implementations/github.py +3 -7
- fsspec/implementations/http.py +34 -47
- fsspec/implementations/jupyter.py +5 -5
- fsspec/implementations/libarchive.py +1 -2
- fsspec/implementations/local.py +8 -4
- fsspec/implementations/memory.py +1 -1
- fsspec/implementations/reference.py +67 -25
- fsspec/implementations/sftp.py +11 -11
- fsspec/implementations/smb.py +4 -5
- fsspec/implementations/webhdfs.py +28 -8
- fsspec/implementations/zip.py +2 -2
- fsspec/mapping.py +2 -2
- fsspec/registry.py +8 -6
- fsspec/spec.py +41 -55
- fsspec/tests/abstract/common.py +5 -5
- fsspec/transaction.py +8 -4
- fsspec/utils.py +204 -37
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/METADATA +7 -6
- fsspec-2023.12.0.dist-info/RECORD +54 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/WHEEL +1 -1
- fsspec/implementations/http_sync.py +0 -882
- fsspec-2023.9.2.dist-info/RECORD +0 -54
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/top_level.txt +0 -0
fsspec/gui.py
CHANGED
|
@@ -70,7 +70,7 @@ class SigSlot:
|
|
|
70
70
|
same name.
|
|
71
71
|
"""
|
|
72
72
|
if name not in self.signals:
|
|
73
|
-
raise ValueError("Attempt to assign an undeclared signal:
|
|
73
|
+
raise ValueError(f"Attempt to assign an undeclared signal: {name}")
|
|
74
74
|
self._sigs[name] = {
|
|
75
75
|
"widget": widget,
|
|
76
76
|
"callbacks": [],
|
|
@@ -141,7 +141,7 @@ class SigSlot:
|
|
|
141
141
|
|
|
142
142
|
Calling of callbacks will halt whenever one returns False.
|
|
143
143
|
"""
|
|
144
|
-
logger.log(self._sigs[sig]["log"], "{}: {}"
|
|
144
|
+
logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
|
|
145
145
|
for callback in self._sigs[sig]["callbacks"]:
|
|
146
146
|
if isinstance(callback, str):
|
|
147
147
|
self._emit(callback)
|
|
@@ -242,7 +242,7 @@ class FileSelector(SigSlot):
|
|
|
242
242
|
else:
|
|
243
243
|
self.init_protocol, url = "file", os.getcwd()
|
|
244
244
|
self.init_url = url
|
|
245
|
-
self.init_kwargs = kwargs or "{}"
|
|
245
|
+
self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
|
|
246
246
|
self.filters = filters
|
|
247
247
|
self.ignore = [re.compile(i) for i in ignore or []]
|
|
248
248
|
self._fs = None
|
|
@@ -319,7 +319,7 @@ class FileSelector(SigSlot):
|
|
|
319
319
|
def urlpath(self):
|
|
320
320
|
"""URL of currently selected item"""
|
|
321
321
|
return (
|
|
322
|
-
(self.protocol.value
|
|
322
|
+
(f"{self.protocol.value}://{self.main.value[0]}")
|
|
323
323
|
if self.main.value
|
|
324
324
|
else None
|
|
325
325
|
)
|
fsspec/implementations/cached.py
CHANGED
|
@@ -17,6 +17,7 @@ from fsspec.exceptions import BlocksizeMismatchError
|
|
|
17
17
|
from fsspec.implementations.cache_mapper import create_cache_mapper
|
|
18
18
|
from fsspec.implementations.cache_metadata import CacheMetadata
|
|
19
19
|
from fsspec.spec import AbstractBufferedFile
|
|
20
|
+
from fsspec.transaction import Transaction
|
|
20
21
|
from fsspec.utils import infer_compression
|
|
21
22
|
|
|
22
23
|
if TYPE_CHECKING:
|
|
@@ -25,6 +26,16 @@ if TYPE_CHECKING:
|
|
|
25
26
|
logger = logging.getLogger("fsspec.cached")
|
|
26
27
|
|
|
27
28
|
|
|
29
|
+
class WriteCachedTransaction(Transaction):
|
|
30
|
+
def complete(self, commit=True):
|
|
31
|
+
rpaths = [f.path for f in self.files]
|
|
32
|
+
lpaths = [f.fn for f in self.files]
|
|
33
|
+
if commit:
|
|
34
|
+
self.fs.put(lpaths, rpaths)
|
|
35
|
+
# else remove?
|
|
36
|
+
self.fs._intrans = False
|
|
37
|
+
|
|
38
|
+
|
|
28
39
|
class CachingFileSystem(AbstractFileSystem):
|
|
29
40
|
"""Locally caching filesystem, layer over any other FS
|
|
30
41
|
|
|
@@ -128,6 +139,11 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
128
139
|
self.expiry = expiry_time
|
|
129
140
|
self.compression = compression
|
|
130
141
|
|
|
142
|
+
# Size of cache in bytes. If None then the size is unknown and will be
|
|
143
|
+
# recalculated the next time cache_size() is called. On writes to the
|
|
144
|
+
# cache this is reset to None.
|
|
145
|
+
self._cache_size = None
|
|
146
|
+
|
|
131
147
|
if same_names is not None and cache_mapper is not None:
|
|
132
148
|
raise ValueError(
|
|
133
149
|
"Cannot specify both same_names and cache_mapper in "
|
|
@@ -165,6 +181,17 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
165
181
|
def _mkcache(self):
|
|
166
182
|
os.makedirs(self.storage[-1], exist_ok=True)
|
|
167
183
|
|
|
184
|
+
def cache_size(self):
|
|
185
|
+
"""Return size of cache in bytes.
|
|
186
|
+
|
|
187
|
+
If more than one cache directory is in use, only the size of the last
|
|
188
|
+
one (the writable cache directory) is returned.
|
|
189
|
+
"""
|
|
190
|
+
if self._cache_size is None:
|
|
191
|
+
cache_dir = self.storage[-1]
|
|
192
|
+
self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
|
|
193
|
+
return self._cache_size
|
|
194
|
+
|
|
168
195
|
def load_cache(self):
|
|
169
196
|
"""Read set of stored blocks from file"""
|
|
170
197
|
self._metadata.load()
|
|
@@ -176,6 +203,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
176
203
|
self._mkcache()
|
|
177
204
|
self._metadata.save()
|
|
178
205
|
self.last_cache = time.time()
|
|
206
|
+
self._cache_size = None
|
|
179
207
|
|
|
180
208
|
def _check_cache(self):
|
|
181
209
|
"""Reload caches if time elapsed or any disappeared"""
|
|
@@ -202,6 +230,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
202
230
|
"""
|
|
203
231
|
rmtree(self.storage[-1])
|
|
204
232
|
self.load_cache()
|
|
233
|
+
self._cache_size = None
|
|
205
234
|
|
|
206
235
|
def clear_expired_cache(self, expiry_time=None):
|
|
207
236
|
"""Remove all expired files and metadata from the cache
|
|
@@ -231,6 +260,8 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
231
260
|
rmtree(self.storage[-1])
|
|
232
261
|
self.load_cache()
|
|
233
262
|
|
|
263
|
+
self._cache_size = None
|
|
264
|
+
|
|
234
265
|
def pop_from_cache(self, path):
|
|
235
266
|
"""Remove cached version of given file
|
|
236
267
|
|
|
@@ -242,6 +273,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
242
273
|
fn = self._metadata.pop_file(path)
|
|
243
274
|
if fn is not None:
|
|
244
275
|
os.remove(fn)
|
|
276
|
+
self._cache_size = None
|
|
245
277
|
|
|
246
278
|
def _open(
|
|
247
279
|
self,
|
|
@@ -283,10 +315,10 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
283
315
|
hash, blocks = detail["fn"], detail["blocks"]
|
|
284
316
|
if blocks is True:
|
|
285
317
|
# stored file is complete
|
|
286
|
-
logger.debug("Opening local copy of %s"
|
|
318
|
+
logger.debug("Opening local copy of %s", path)
|
|
287
319
|
return open(fn, mode)
|
|
288
320
|
# TODO: action where partial file exists in read-only cache
|
|
289
|
-
logger.debug("Opening partially cached copy of %s"
|
|
321
|
+
logger.debug("Opening partially cached copy of %s", path)
|
|
290
322
|
else:
|
|
291
323
|
hash = self._mapper(path)
|
|
292
324
|
fn = os.path.join(self.storage[-1], hash)
|
|
@@ -299,7 +331,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
299
331
|
"uid": self.fs.ukey(path),
|
|
300
332
|
}
|
|
301
333
|
self._metadata.update_file(path, detail)
|
|
302
|
-
logger.debug("Creating local sparse file for %s"
|
|
334
|
+
logger.debug("Creating local sparse file for %s", path)
|
|
303
335
|
|
|
304
336
|
# call target filesystems open
|
|
305
337
|
self._mkcache()
|
|
@@ -322,9 +354,9 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
322
354
|
if "blocksize" in detail:
|
|
323
355
|
if detail["blocksize"] != f.blocksize:
|
|
324
356
|
raise BlocksizeMismatchError(
|
|
325
|
-
"Cached file must be reopened with same block"
|
|
326
|
-
"size as original (old:
|
|
327
|
-
"
|
|
357
|
+
f"Cached file must be reopened with same block"
|
|
358
|
+
f" size as original (old: {detail['blocksize']},"
|
|
359
|
+
f" new {f.blocksize})"
|
|
328
360
|
)
|
|
329
361
|
else:
|
|
330
362
|
detail["blocksize"] = f.blocksize
|
|
@@ -334,6 +366,9 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
334
366
|
self.save_cache()
|
|
335
367
|
return f
|
|
336
368
|
|
|
369
|
+
def _parent(self, path):
|
|
370
|
+
return self.fs._parent(path)
|
|
371
|
+
|
|
337
372
|
def hash_name(self, path: str, *args: Any) -> str:
|
|
338
373
|
# Kept for backward compatibility with downstream libraries.
|
|
339
374
|
# Ignores extra arguments, previously same_name boolean.
|
|
@@ -369,6 +404,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
369
404
|
"open",
|
|
370
405
|
"cat",
|
|
371
406
|
"cat_file",
|
|
407
|
+
"cat_ranges",
|
|
372
408
|
"get",
|
|
373
409
|
"read_block",
|
|
374
410
|
"tail",
|
|
@@ -389,6 +425,11 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
389
425
|
"__hash__",
|
|
390
426
|
"__eq__",
|
|
391
427
|
"to_json",
|
|
428
|
+
"cache_size",
|
|
429
|
+
"pipe_file",
|
|
430
|
+
"pipe",
|
|
431
|
+
"start_transaction",
|
|
432
|
+
"end_transaction",
|
|
392
433
|
]:
|
|
393
434
|
# all the methods defined in this class. Note `open` here, since
|
|
394
435
|
# it calls `_open`, but is actually in superclass
|
|
@@ -397,7 +438,10 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
397
438
|
)
|
|
398
439
|
if item in ["__reduce_ex__"]:
|
|
399
440
|
raise AttributeError
|
|
400
|
-
if item in ["
|
|
441
|
+
if item in ["transaction"]:
|
|
442
|
+
# property
|
|
443
|
+
return type(self).transaction.__get__(self)
|
|
444
|
+
if item in ["_cache", "transaction_type"]:
|
|
401
445
|
# class attributes
|
|
402
446
|
return getattr(type(self), item)
|
|
403
447
|
if item == "__class__":
|
|
@@ -486,7 +530,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
486
530
|
self._mkcache()
|
|
487
531
|
else:
|
|
488
532
|
return [
|
|
489
|
-
LocalTempFile(
|
|
533
|
+
LocalTempFile(
|
|
534
|
+
self.fs,
|
|
535
|
+
path,
|
|
536
|
+
mode=open_files.mode,
|
|
537
|
+
fn=os.path.join(self.storage[-1], self._mapper(path)),
|
|
538
|
+
)
|
|
539
|
+
for path in paths
|
|
490
540
|
]
|
|
491
541
|
|
|
492
542
|
if self.compression:
|
|
@@ -535,6 +585,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
535
585
|
os.remove(f.name)
|
|
536
586
|
except FileNotFoundError:
|
|
537
587
|
pass
|
|
588
|
+
self._cache_size = None
|
|
538
589
|
|
|
539
590
|
def _make_local_details(self, path):
|
|
540
591
|
hash = self._mapper(path)
|
|
@@ -547,7 +598,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
547
598
|
"uid": self.fs.ukey(path),
|
|
548
599
|
}
|
|
549
600
|
self._metadata.update_file(path, detail)
|
|
550
|
-
logger.debug("Copying %s to local cache"
|
|
601
|
+
logger.debug("Copying %s to local cache", path)
|
|
551
602
|
return fn
|
|
552
603
|
|
|
553
604
|
def cat(
|
|
@@ -598,13 +649,14 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
598
649
|
def _open(self, path, mode="rb", **kwargs):
|
|
599
650
|
path = self._strip_protocol(path)
|
|
600
651
|
if "r" not in mode:
|
|
601
|
-
|
|
652
|
+
fn = self._make_local_details(path)
|
|
653
|
+
return LocalTempFile(self, path, mode=mode, fn=fn)
|
|
602
654
|
detail = self._check_file(path)
|
|
603
655
|
if detail:
|
|
604
656
|
detail, fn = detail
|
|
605
657
|
_, blocks = detail["fn"], detail["blocks"]
|
|
606
658
|
if blocks is True:
|
|
607
|
-
logger.debug("Opening local copy of %s"
|
|
659
|
+
logger.debug("Opening local copy of %s", path)
|
|
608
660
|
|
|
609
661
|
# In order to support downstream filesystems to be able to
|
|
610
662
|
# infer the compression from the original filename, like
|
|
@@ -616,8 +668,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
616
668
|
return f
|
|
617
669
|
else:
|
|
618
670
|
raise ValueError(
|
|
619
|
-
"Attempt to open partially cached file
|
|
620
|
-
"as a wholly cached file"
|
|
671
|
+
f"Attempt to open partially cached file {path}"
|
|
672
|
+
f" as a wholly cached file"
|
|
621
673
|
)
|
|
622
674
|
else:
|
|
623
675
|
fn = self._make_local_details(path)
|
|
@@ -665,6 +717,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
665
717
|
|
|
666
718
|
protocol = "simplecache"
|
|
667
719
|
local_file = True
|
|
720
|
+
transaction_type = WriteCachedTransaction
|
|
668
721
|
|
|
669
722
|
def __init__(self, **kwargs):
|
|
670
723
|
kw = kwargs.copy()
|
|
@@ -689,21 +742,52 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
689
742
|
def load_cache(self):
|
|
690
743
|
pass
|
|
691
744
|
|
|
745
|
+
def pipe_file(self, path, value=None, **kwargs):
|
|
746
|
+
if self._intrans:
|
|
747
|
+
with self.open(path, "wb") as f:
|
|
748
|
+
f.write(value)
|
|
749
|
+
else:
|
|
750
|
+
super().pipe_file(path, value)
|
|
751
|
+
|
|
752
|
+
def pipe(self, path, value=None, **kwargs):
|
|
753
|
+
if isinstance(path, str):
|
|
754
|
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
|
755
|
+
elif isinstance(path, dict):
|
|
756
|
+
for k, v in path.items():
|
|
757
|
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
|
758
|
+
else:
|
|
759
|
+
raise ValueError("path must be str or dict")
|
|
760
|
+
|
|
761
|
+
def cat_ranges(
|
|
762
|
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
|
763
|
+
):
|
|
764
|
+
lpaths = [self._check_file(p) for p in paths]
|
|
765
|
+
rpaths = [p for l, p in zip(lpaths, paths) if l is False]
|
|
766
|
+
lpaths = [l for l, p in zip(lpaths, paths) if l is False]
|
|
767
|
+
self.fs.get(rpaths, lpaths)
|
|
768
|
+
return super().cat_ranges(
|
|
769
|
+
paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
|
|
770
|
+
)
|
|
771
|
+
|
|
692
772
|
def _open(self, path, mode="rb", **kwargs):
|
|
693
773
|
path = self._strip_protocol(path)
|
|
774
|
+
sha = self._mapper(path)
|
|
694
775
|
|
|
695
776
|
if "r" not in mode:
|
|
696
|
-
|
|
777
|
+
fn = os.path.join(self.storage[-1], sha)
|
|
778
|
+
return LocalTempFile(
|
|
779
|
+
self, path, mode=mode, autocommit=not self._intrans, fn=fn
|
|
780
|
+
)
|
|
697
781
|
fn = self._check_file(path)
|
|
698
782
|
if fn:
|
|
699
783
|
return open(fn, mode)
|
|
700
784
|
|
|
701
|
-
sha = self._mapper(path)
|
|
702
785
|
fn = os.path.join(self.storage[-1], sha)
|
|
703
|
-
logger.debug("Copying %s to local cache"
|
|
786
|
+
logger.debug("Copying %s to local cache", path)
|
|
704
787
|
kwargs["mode"] = mode
|
|
705
788
|
|
|
706
789
|
self._mkcache()
|
|
790
|
+
self._cache_size = None
|
|
707
791
|
if self.compression:
|
|
708
792
|
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
|
|
709
793
|
if isinstance(f, AbstractBufferedFile):
|
|
@@ -728,13 +812,9 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
728
812
|
class LocalTempFile:
|
|
729
813
|
"""A temporary local file, which will be uploaded on commit"""
|
|
730
814
|
|
|
731
|
-
def __init__(self, fs, path, fn
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
self.fh = open(fn, mode)
|
|
735
|
-
else:
|
|
736
|
-
fd, self.fn = tempfile.mkstemp()
|
|
737
|
-
self.fh = open(fd, mode)
|
|
815
|
+
def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
|
|
816
|
+
self.fn = fn
|
|
817
|
+
self.fh = open(fn, mode)
|
|
738
818
|
self.mode = mode
|
|
739
819
|
if seek:
|
|
740
820
|
self.fh.seek(seek)
|
|
@@ -744,10 +824,10 @@ class LocalTempFile:
|
|
|
744
824
|
self.autocommit = autocommit
|
|
745
825
|
|
|
746
826
|
def __reduce__(self):
|
|
747
|
-
# always open in
|
|
827
|
+
# always open in r+b to allow continuing writing at a location
|
|
748
828
|
return (
|
|
749
829
|
LocalTempFile,
|
|
750
|
-
(self.fs, self.path, self.fn, "
|
|
830
|
+
(self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
|
|
751
831
|
)
|
|
752
832
|
|
|
753
833
|
def __enter__(self):
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
from urllib.parse import unquote
|
|
4
|
+
|
|
5
|
+
from fsspec import AbstractFileSystem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DataFileSystem(AbstractFileSystem):
|
|
9
|
+
"""A handy decoder for data-URLs
|
|
10
|
+
|
|
11
|
+
Example
|
|
12
|
+
-------
|
|
13
|
+
>>> with fsspec.open("data:,Hello%2C%20World%21") as f:
|
|
14
|
+
... print(f.read())
|
|
15
|
+
b"Hello, World!"
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
protocol = "data"
|
|
20
|
+
|
|
21
|
+
def __init__(self, **kwargs):
|
|
22
|
+
"""No parameters for this filesystem"""
|
|
23
|
+
super().__init__(**kwargs)
|
|
24
|
+
|
|
25
|
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
|
26
|
+
pref, data = path.split(",", 1)
|
|
27
|
+
if pref.endswith("base64"):
|
|
28
|
+
return base64.b64decode(data)[start:end]
|
|
29
|
+
return unquote(data).encode()[start:end]
|
|
30
|
+
|
|
31
|
+
def info(self, path, **kwargs):
|
|
32
|
+
pref, name = path.split(",", 1)
|
|
33
|
+
data = self.cat_file(path)
|
|
34
|
+
mime = pref.split(":", 1)[1].split(";", 1)[0]
|
|
35
|
+
return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
|
|
36
|
+
|
|
37
|
+
def _open(
|
|
38
|
+
self,
|
|
39
|
+
path,
|
|
40
|
+
mode="rb",
|
|
41
|
+
block_size=None,
|
|
42
|
+
autocommit=True,
|
|
43
|
+
cache_options=None,
|
|
44
|
+
**kwargs,
|
|
45
|
+
):
|
|
46
|
+
if "r" not in mode:
|
|
47
|
+
raise ValueError("Read only filesystem")
|
|
48
|
+
return io.BytesIO(self.cat_file(path))
|
fsspec/implementations/ftp.py
CHANGED
|
@@ -57,7 +57,7 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
57
57
|
encoding: str
|
|
58
58
|
Encoding to use for directories and filenames in FTP connection
|
|
59
59
|
"""
|
|
60
|
-
super(
|
|
60
|
+
super().__init__(**kwargs)
|
|
61
61
|
self.host = host
|
|
62
62
|
self.port = port
|
|
63
63
|
self.tempdir = tempdir or "/tmp"
|
|
@@ -156,7 +156,7 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
156
156
|
outfile.write(x)
|
|
157
157
|
|
|
158
158
|
self.ftp.retrbinary(
|
|
159
|
-
"RETR
|
|
159
|
+
f"RETR {rpath}",
|
|
160
160
|
blocksize=self.blocksize,
|
|
161
161
|
callback=cb,
|
|
162
162
|
)
|
|
@@ -172,7 +172,7 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
172
172
|
out.append(x)
|
|
173
173
|
|
|
174
174
|
self.ftp.retrbinary(
|
|
175
|
-
"RETR
|
|
175
|
+
f"RETR {path}",
|
|
176
176
|
blocksize=self.blocksize,
|
|
177
177
|
rest=start,
|
|
178
178
|
callback=cb,
|
|
@@ -252,7 +252,7 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
252
252
|
self.dircache.clear()
|
|
253
253
|
else:
|
|
254
254
|
self.dircache.pop(path, None)
|
|
255
|
-
super(
|
|
255
|
+
super().invalidate_cache(path)
|
|
256
256
|
|
|
257
257
|
|
|
258
258
|
class TransferDone(Exception):
|
|
@@ -321,7 +321,7 @@ class FTPFile(AbstractBufferedFile):
|
|
|
321
321
|
|
|
322
322
|
try:
|
|
323
323
|
self.fs.ftp.retrbinary(
|
|
324
|
-
"RETR
|
|
324
|
+
f"RETR {self.path}",
|
|
325
325
|
blocksize=self.blocksize,
|
|
326
326
|
rest=start,
|
|
327
327
|
callback=callback,
|
|
@@ -339,7 +339,7 @@ class FTPFile(AbstractBufferedFile):
|
|
|
339
339
|
def _upload_chunk(self, final=False):
|
|
340
340
|
self.buffer.seek(0)
|
|
341
341
|
self.fs.ftp.storbinary(
|
|
342
|
-
"STOR
|
|
342
|
+
f"STOR {self.path}", self.buffer, blocksize=self.blocksize, rest=self.offset
|
|
343
343
|
)
|
|
344
344
|
return True
|
|
345
345
|
|
fsspec/implementations/git.py
CHANGED
|
@@ -81,7 +81,7 @@ class GitFileSystem(AbstractFileSystem):
|
|
|
81
81
|
"type": "directory",
|
|
82
82
|
"name": "/".join([path, obj.name]).lstrip("/"),
|
|
83
83
|
"hex": obj.hex,
|
|
84
|
-
"mode": "
|
|
84
|
+
"mode": f"{obj.filemode:o}",
|
|
85
85
|
"size": 0,
|
|
86
86
|
}
|
|
87
87
|
)
|
|
@@ -91,7 +91,7 @@ class GitFileSystem(AbstractFileSystem):
|
|
|
91
91
|
"type": "file",
|
|
92
92
|
"name": "/".join([path, obj.name]).lstrip("/"),
|
|
93
93
|
"hex": obj.hex,
|
|
94
|
-
"mode": "
|
|
94
|
+
"mode": f"{obj.filemode:o}",
|
|
95
95
|
"size": obj.size,
|
|
96
96
|
}
|
|
97
97
|
)
|
|
@@ -102,7 +102,7 @@ class GitFileSystem(AbstractFileSystem):
|
|
|
102
102
|
"type": "file",
|
|
103
103
|
"name": obj.name,
|
|
104
104
|
"hex": obj.hex,
|
|
105
|
-
"mode": "
|
|
105
|
+
"mode": f"{obj.filemode:o}",
|
|
106
106
|
"size": obj.size,
|
|
107
107
|
}
|
|
108
108
|
]
|
fsspec/implementations/github.py
CHANGED
|
@@ -79,9 +79,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
79
79
|
List of string
|
|
80
80
|
"""
|
|
81
81
|
r = requests.get(
|
|
82
|
-
"https://api.github.com/{
|
|
83
|
-
part=["users", "orgs"][is_org], org=org_or_user
|
|
84
|
-
)
|
|
82
|
+
f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos"
|
|
85
83
|
)
|
|
86
84
|
r.raise_for_status()
|
|
87
85
|
return [repo["name"] for repo in r.json()]
|
|
@@ -90,8 +88,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
90
88
|
def tags(self):
|
|
91
89
|
"""Names of tags in the repo"""
|
|
92
90
|
r = requests.get(
|
|
93
|
-
"https://api.github.com/repos/{org}/{repo}/tags"
|
|
94
|
-
"".format(org=self.org, repo=self.repo),
|
|
91
|
+
f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
|
|
95
92
|
**self.kw,
|
|
96
93
|
)
|
|
97
94
|
r.raise_for_status()
|
|
@@ -101,8 +98,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
101
98
|
def branches(self):
|
|
102
99
|
"""Names of branches in the repo"""
|
|
103
100
|
r = requests.get(
|
|
104
|
-
"https://api.github.com/repos/{org}/{repo}/branches"
|
|
105
|
-
"".format(org=self.org, repo=self.repo),
|
|
101
|
+
f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
|
|
106
102
|
**self.kw,
|
|
107
103
|
)
|
|
108
104
|
r.raise_for_status()
|
fsspec/implementations/http.py
CHANGED
|
@@ -14,7 +14,13 @@ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_w
|
|
|
14
14
|
from fsspec.callbacks import _DEFAULT_CALLBACK
|
|
15
15
|
from fsspec.exceptions import FSTimeoutError
|
|
16
16
|
from fsspec.spec import AbstractBufferedFile
|
|
17
|
-
from fsspec.utils import
|
|
17
|
+
from fsspec.utils import (
|
|
18
|
+
DEFAULT_BLOCK_SIZE,
|
|
19
|
+
glob_translate,
|
|
20
|
+
isfilelike,
|
|
21
|
+
nullcontext,
|
|
22
|
+
tokenize,
|
|
23
|
+
)
|
|
18
24
|
|
|
19
25
|
from ..caching import AllBytes
|
|
20
26
|
|
|
@@ -165,7 +171,7 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
165
171
|
l = l[1]
|
|
166
172
|
if l.startswith("/") and len(l) > 1:
|
|
167
173
|
# absolute URL on this server
|
|
168
|
-
l = parts.scheme
|
|
174
|
+
l = f"{parts.scheme}://{parts.netloc}{l}"
|
|
169
175
|
if l.startswith("http"):
|
|
170
176
|
if self.same_schema and l.startswith(url.rstrip("/") + "/"):
|
|
171
177
|
out.add(l)
|
|
@@ -441,8 +447,9 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
441
447
|
raise ValueError("maxdepth must be at least 1")
|
|
442
448
|
import re
|
|
443
449
|
|
|
444
|
-
|
|
450
|
+
ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
|
|
445
451
|
path = self._strip_protocol(path)
|
|
452
|
+
append_slash_to_dirname = ends_with_slash or path.endswith("/**")
|
|
446
453
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
447
454
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
448
455
|
|
|
@@ -451,11 +458,11 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
451
458
|
detail = kwargs.pop("detail", False)
|
|
452
459
|
|
|
453
460
|
if not has_magic(path):
|
|
454
|
-
if await self._exists(path):
|
|
461
|
+
if await self._exists(path, **kwargs):
|
|
455
462
|
if not detail:
|
|
456
463
|
return [path]
|
|
457
464
|
else:
|
|
458
|
-
return {path: await self._info(path)}
|
|
465
|
+
return {path: await self._info(path, **kwargs)}
|
|
459
466
|
else:
|
|
460
467
|
if not detail:
|
|
461
468
|
return [] # glob of non-existent returns empty
|
|
@@ -480,45 +487,22 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
480
487
|
allpaths = await self._find(
|
|
481
488
|
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
|
482
489
|
)
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
486
|
-
# for shell globbing details.
|
|
487
|
-
pattern = (
|
|
488
|
-
"^"
|
|
489
|
-
+ (
|
|
490
|
-
path.replace("\\", r"\\")
|
|
491
|
-
.replace(".", r"\.")
|
|
492
|
-
.replace("+", r"\+")
|
|
493
|
-
.replace("//", "/")
|
|
494
|
-
.replace("(", r"\(")
|
|
495
|
-
.replace(")", r"\)")
|
|
496
|
-
.replace("|", r"\|")
|
|
497
|
-
.replace("^", r"\^")
|
|
498
|
-
.replace("$", r"\$")
|
|
499
|
-
.replace("{", r"\{")
|
|
500
|
-
.replace("}", r"\}")
|
|
501
|
-
.rstrip("/")
|
|
502
|
-
)
|
|
503
|
-
+ "$"
|
|
504
|
-
)
|
|
505
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
506
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
507
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
508
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
509
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
490
|
+
|
|
491
|
+
pattern = glob_translate(path + ("/" if ends_with_slash else ""))
|
|
510
492
|
pattern = re.compile(pattern)
|
|
493
|
+
|
|
511
494
|
out = {
|
|
512
|
-
p:
|
|
513
|
-
for p in sorted(allpaths)
|
|
514
|
-
if pattern.match(
|
|
495
|
+
p: info
|
|
496
|
+
for p, info in sorted(allpaths.items())
|
|
497
|
+
if pattern.match(
|
|
498
|
+
(
|
|
499
|
+
p + "/"
|
|
500
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
501
|
+
else p
|
|
502
|
+
)
|
|
503
|
+
)
|
|
515
504
|
}
|
|
516
505
|
|
|
517
|
-
# Return directories only when the glob end by a slash
|
|
518
|
-
# This is needed for posix glob compliance
|
|
519
|
-
if ends:
|
|
520
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
521
|
-
|
|
522
506
|
if detail:
|
|
523
507
|
return out
|
|
524
508
|
else:
|
|
@@ -655,8 +639,8 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
655
639
|
logger.debug(f"Fetch range for {self}: {start}-{end}")
|
|
656
640
|
kwargs = self.kwargs.copy()
|
|
657
641
|
headers = kwargs.pop("headers", {}).copy()
|
|
658
|
-
headers["Range"] = "bytes
|
|
659
|
-
logger.debug(
|
|
642
|
+
headers["Range"] = f"bytes={start}-{end - 1}"
|
|
643
|
+
logger.debug(f"{self.url} : {headers['Range']}")
|
|
660
644
|
r = await self.session.get(
|
|
661
645
|
self.fs.encode_url(self.url), headers=headers, **kwargs
|
|
662
646
|
)
|
|
@@ -812,13 +796,13 @@ async def get_range(session, url, start, end, file=None, **kwargs):
|
|
|
812
796
|
# explicit get a range when we know it must be safe
|
|
813
797
|
kwargs = kwargs.copy()
|
|
814
798
|
headers = kwargs.pop("headers", {}).copy()
|
|
815
|
-
headers["Range"] = "bytes
|
|
799
|
+
headers["Range"] = f"bytes={start}-{end - 1}"
|
|
816
800
|
r = await session.get(url, headers=headers, **kwargs)
|
|
817
801
|
r.raise_for_status()
|
|
818
802
|
async with r:
|
|
819
803
|
out = await r.read()
|
|
820
804
|
if file:
|
|
821
|
-
with open(file, "
|
|
805
|
+
with open(file, "r+b") as f:
|
|
822
806
|
f.seek(start)
|
|
823
807
|
f.write(out)
|
|
824
808
|
else:
|
|
@@ -831,7 +815,7 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
831
815
|
Default operation is to explicitly allow redirects and use encoding
|
|
832
816
|
'identity' (no compression) to get the true size of the target.
|
|
833
817
|
"""
|
|
834
|
-
logger.debug("Retrieve file size for %s"
|
|
818
|
+
logger.debug("Retrieve file size for %s", url)
|
|
835
819
|
kwargs = kwargs.copy()
|
|
836
820
|
ar = kwargs.pop("allow_redirects", True)
|
|
837
821
|
head = kwargs.get("headers", {}).copy()
|
|
@@ -844,7 +828,7 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
844
828
|
elif size_policy == "get":
|
|
845
829
|
r = await session.get(url, allow_redirects=ar, **kwargs)
|
|
846
830
|
else:
|
|
847
|
-
raise TypeError('size_policy must be "head" or "get", got
|
|
831
|
+
raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
|
|
848
832
|
async with r:
|
|
849
833
|
r.raise_for_status()
|
|
850
834
|
|
|
@@ -855,7 +839,10 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
855
839
|
if "Content-Length" in r.headers:
|
|
856
840
|
# Some servers may choose to ignore Accept-Encoding and return
|
|
857
841
|
# compressed content, in which case the returned size is unreliable.
|
|
858
|
-
if r.headers.
|
|
842
|
+
if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
|
|
843
|
+
"identity",
|
|
844
|
+
"",
|
|
845
|
+
]:
|
|
859
846
|
info["size"] = int(r.headers["Content-Length"])
|
|
860
847
|
elif "Content-Range" in r.headers:
|
|
861
848
|
info["size"] = int(r.headers["Content-Range"].split("/")[1])
|