fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +6 -1
- fsspec/_version.py +4 -4
- fsspec/archive.py +1 -1
- fsspec/asyn.py +35 -45
- fsspec/caching.py +161 -90
- fsspec/compression.py +2 -4
- fsspec/core.py +19 -6
- fsspec/fuse.py +2 -2
- fsspec/generic.py +5 -1
- fsspec/gui.py +4 -4
- fsspec/implementations/cached.py +105 -25
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/ftp.py +6 -6
- fsspec/implementations/git.py +3 -3
- fsspec/implementations/github.py +3 -7
- fsspec/implementations/http.py +34 -47
- fsspec/implementations/jupyter.py +5 -5
- fsspec/implementations/libarchive.py +1 -2
- fsspec/implementations/local.py +8 -4
- fsspec/implementations/memory.py +1 -1
- fsspec/implementations/reference.py +67 -25
- fsspec/implementations/sftp.py +11 -11
- fsspec/implementations/smb.py +4 -5
- fsspec/implementations/webhdfs.py +28 -8
- fsspec/implementations/zip.py +2 -2
- fsspec/mapping.py +2 -2
- fsspec/registry.py +8 -6
- fsspec/spec.py +41 -55
- fsspec/tests/abstract/common.py +5 -5
- fsspec/transaction.py +8 -4
- fsspec/utils.py +204 -37
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/METADATA +7 -6
- fsspec-2023.12.0.dist-info/RECORD +54 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/WHEEL +1 -1
- fsspec/implementations/http_sync.py +0 -882
- fsspec-2023.9.2.dist-info/RECORD +0 -54
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/top_level.txt +0 -0
fsspec/spec.py
CHANGED
|
@@ -17,6 +17,7 @@ from .dircache import DirCache
|
|
|
17
17
|
from .transaction import Transaction
|
|
18
18
|
from .utils import (
|
|
19
19
|
_unstrip_protocol,
|
|
20
|
+
glob_translate,
|
|
20
21
|
isfilelike,
|
|
21
22
|
other_paths,
|
|
22
23
|
read_block,
|
|
@@ -109,6 +110,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
109
110
|
async_impl = False
|
|
110
111
|
mirror_sync_methods = False
|
|
111
112
|
root_marker = "" # For some FSs, may require leading '/' or other character
|
|
113
|
+
transaction_type = Transaction
|
|
112
114
|
|
|
113
115
|
#: Extra *class attributes* that should be considered when hashing.
|
|
114
116
|
_extra_tokenize_attributes = ()
|
|
@@ -196,7 +198,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
196
198
|
# use of root_marker to make minimum required path, e.g., "/"
|
|
197
199
|
return path or cls.root_marker
|
|
198
200
|
|
|
199
|
-
def unstrip_protocol(self, name):
|
|
201
|
+
def unstrip_protocol(self, name: str) -> str:
|
|
200
202
|
"""Format FS-specific path to generic, including protocol"""
|
|
201
203
|
protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
|
|
202
204
|
for protocol in protos:
|
|
@@ -235,20 +237,20 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
235
237
|
for the normal and exception cases.
|
|
236
238
|
"""
|
|
237
239
|
if self._transaction is None:
|
|
238
|
-
self._transaction =
|
|
240
|
+
self._transaction = self.transaction_type(self)
|
|
239
241
|
return self._transaction
|
|
240
242
|
|
|
241
243
|
def start_transaction(self):
|
|
242
244
|
"""Begin write transaction for deferring files, non-context version"""
|
|
243
245
|
self._intrans = True
|
|
244
|
-
self._transaction =
|
|
246
|
+
self._transaction = self.transaction_type(self)
|
|
245
247
|
return self.transaction
|
|
246
248
|
|
|
247
249
|
def end_transaction(self):
|
|
248
250
|
"""Finish write transaction, non-context version"""
|
|
249
251
|
self.transaction.complete()
|
|
250
252
|
self._transaction = None
|
|
251
|
-
# The invalid cache must be cleared after the
|
|
253
|
+
# The invalid cache must be cleared after the transaction is completed.
|
|
252
254
|
for path in self._invalidated_caches_in_transaction:
|
|
253
255
|
self.invalidate_cache(path)
|
|
254
256
|
self._invalidated_caches_in_transaction.clear()
|
|
@@ -551,10 +553,6 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
551
553
|
|
|
552
554
|
The `maxdepth` option is applied on the first `**` found in the path.
|
|
553
555
|
|
|
554
|
-
Search path names that contain embedded characters special to this
|
|
555
|
-
implementation of glob may not produce expected results;
|
|
556
|
-
e.g., 'foo/bar/*starredfilename*'.
|
|
557
|
-
|
|
558
556
|
kwargs are passed to ``ls``.
|
|
559
557
|
"""
|
|
560
558
|
if maxdepth is not None and maxdepth < 1:
|
|
@@ -562,8 +560,12 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
562
560
|
|
|
563
561
|
import re
|
|
564
562
|
|
|
565
|
-
|
|
563
|
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
|
564
|
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
|
566
565
|
path = self._strip_protocol(path)
|
|
566
|
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
|
567
|
+
tuple(sep + "**" for sep in seps)
|
|
568
|
+
)
|
|
567
569
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
568
570
|
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
|
569
571
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
@@ -573,11 +575,11 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
573
575
|
detail = kwargs.pop("detail", False)
|
|
574
576
|
|
|
575
577
|
if not has_magic(path):
|
|
576
|
-
if self.exists(path):
|
|
578
|
+
if self.exists(path, **kwargs):
|
|
577
579
|
if not detail:
|
|
578
580
|
return [path]
|
|
579
581
|
else:
|
|
580
|
-
return {path: self.info(path)}
|
|
582
|
+
return {path: self.info(path, **kwargs)}
|
|
581
583
|
else:
|
|
582
584
|
if not detail:
|
|
583
585
|
return [] # glob of non-existent returns empty
|
|
@@ -600,47 +602,22 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
600
602
|
depth = None
|
|
601
603
|
|
|
602
604
|
allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
606
|
-
# for shell globbing details.
|
|
607
|
-
pattern = (
|
|
608
|
-
"^"
|
|
609
|
-
+ (
|
|
610
|
-
path.replace("\\", r"\\")
|
|
611
|
-
.replace(".", r"\.")
|
|
612
|
-
.replace("+", r"\+")
|
|
613
|
-
.replace("//", "/")
|
|
614
|
-
.replace("(", r"\(")
|
|
615
|
-
.replace(")", r"\)")
|
|
616
|
-
.replace("|", r"\|")
|
|
617
|
-
.replace("^", r"\^")
|
|
618
|
-
.replace("$", r"\$")
|
|
619
|
-
.replace("{", r"\{")
|
|
620
|
-
.replace("}", r"\}")
|
|
621
|
-
.rstrip("/")
|
|
622
|
-
.replace("?", ".")
|
|
623
|
-
)
|
|
624
|
-
+ "$"
|
|
625
|
-
)
|
|
626
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
627
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
628
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
629
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
630
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
605
|
+
|
|
606
|
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
|
631
607
|
pattern = re.compile(pattern)
|
|
632
608
|
|
|
633
609
|
out = {
|
|
634
|
-
p:
|
|
635
|
-
for p in sorted(allpaths)
|
|
636
|
-
if pattern.match(
|
|
610
|
+
p: info
|
|
611
|
+
for p, info in sorted(allpaths.items())
|
|
612
|
+
if pattern.match(
|
|
613
|
+
(
|
|
614
|
+
p + "/"
|
|
615
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
616
|
+
else p
|
|
617
|
+
)
|
|
618
|
+
)
|
|
637
619
|
}
|
|
638
620
|
|
|
639
|
-
# Return directories only when the glob end by a slash
|
|
640
|
-
# This is needed for posix glob compliance
|
|
641
|
-
if ends:
|
|
642
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
643
|
-
|
|
644
621
|
if detail:
|
|
645
622
|
return out
|
|
646
623
|
else:
|
|
@@ -828,6 +805,16 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
828
805
|
def cat_ranges(
|
|
829
806
|
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
|
830
807
|
):
|
|
808
|
+
"""Get the contents of byte ranges from one or more files
|
|
809
|
+
|
|
810
|
+
Parameters
|
|
811
|
+
----------
|
|
812
|
+
paths: list
|
|
813
|
+
A list of of filepaths on this filesystems
|
|
814
|
+
starts, ends: int or list
|
|
815
|
+
Bytes limits of the read. If using a single int, the same value will be
|
|
816
|
+
used to read all the specified files.
|
|
817
|
+
"""
|
|
831
818
|
if max_gap is not None:
|
|
832
819
|
raise NotImplementedError
|
|
833
820
|
if not isinstance(paths, list):
|
|
@@ -835,7 +822,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
835
822
|
if not isinstance(starts, list):
|
|
836
823
|
starts = [starts] * len(paths)
|
|
837
824
|
if not isinstance(ends, list):
|
|
838
|
-
ends = [
|
|
825
|
+
ends = [ends] * len(paths)
|
|
839
826
|
if len(starts) != len(paths) or len(ends) != len(paths):
|
|
840
827
|
raise ValueError
|
|
841
828
|
out = []
|
|
@@ -901,7 +888,8 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
901
888
|
os.makedirs(lpath, exist_ok=True)
|
|
902
889
|
return None
|
|
903
890
|
|
|
904
|
-
LocalFileSystem(auto_mkdir=True)
|
|
891
|
+
fs = LocalFileSystem(auto_mkdir=True)
|
|
892
|
+
fs.makedirs(fs._parent(lpath), exist_ok=True)
|
|
905
893
|
|
|
906
894
|
with self.open(rpath, "rb", **kwargs) as f1:
|
|
907
895
|
if outfile is None:
|
|
@@ -1187,9 +1175,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1187
1175
|
def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
|
|
1188
1176
|
"""Move file(s) from one location to another"""
|
|
1189
1177
|
if path1 == path2:
|
|
1190
|
-
logger.debug(
|
|
1191
|
-
"%s mv: The paths are the same, so no files were moved." % (self)
|
|
1192
|
-
)
|
|
1178
|
+
logger.debug("%s mv: The paths are the same, so no files were moved.", self)
|
|
1193
1179
|
else:
|
|
1194
1180
|
self.copy(path1, path2, recursive=recursive, maxdepth=maxdepth)
|
|
1195
1181
|
self.rm(path1, recursive=recursive)
|
|
@@ -1744,7 +1730,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1744
1730
|
elif whence == 2:
|
|
1745
1731
|
nloc = self.size + loc
|
|
1746
1732
|
else:
|
|
1747
|
-
raise ValueError("invalid whence (
|
|
1733
|
+
raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
|
|
1748
1734
|
if nloc < 0:
|
|
1749
1735
|
raise ValueError("Seek before start of file")
|
|
1750
1736
|
self.loc = nloc
|
|
@@ -1851,7 +1837,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1851
1837
|
length = self.size - self.loc
|
|
1852
1838
|
if self.closed:
|
|
1853
1839
|
raise ValueError("I/O operation on closed file.")
|
|
1854
|
-
logger.debug("%s read: %i - %i"
|
|
1840
|
+
logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
|
|
1855
1841
|
if length == 0:
|
|
1856
1842
|
# don't even bother calling fetch
|
|
1857
1843
|
return b""
|
|
@@ -1966,7 +1952,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1966
1952
|
self.close()
|
|
1967
1953
|
|
|
1968
1954
|
def __str__(self):
|
|
1969
|
-
return "<File-like object
|
|
1955
|
+
return f"<File-like object {type(self.fs).__name__}, {self.path}>"
|
|
1970
1956
|
|
|
1971
1957
|
__repr__ = __str__
|
|
1972
1958
|
|
fsspec/tests/abstract/common.py
CHANGED
|
@@ -107,9 +107,9 @@ GLOB_EDGE_CASES_TESTS = {
|
|
|
107
107
|
"subdir1/subfile2",
|
|
108
108
|
],
|
|
109
109
|
),
|
|
110
|
-
("
|
|
110
|
+
("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
|
111
111
|
(
|
|
112
|
-
"
|
|
112
|
+
"**/*1",
|
|
113
113
|
True,
|
|
114
114
|
None,
|
|
115
115
|
[
|
|
@@ -120,14 +120,14 @@ GLOB_EDGE_CASES_TESTS = {
|
|
|
120
120
|
"subdir1/nesteddir/nestedfile",
|
|
121
121
|
],
|
|
122
122
|
),
|
|
123
|
-
("
|
|
123
|
+
("**/*1", True, 1, ["file1"]),
|
|
124
124
|
(
|
|
125
|
-
"
|
|
125
|
+
"**/*1",
|
|
126
126
|
True,
|
|
127
127
|
2,
|
|
128
128
|
["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
|
|
129
129
|
),
|
|
130
|
-
("
|
|
130
|
+
("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
|
131
131
|
("**/subdir0", False, None, []),
|
|
132
132
|
("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
|
133
133
|
("**/subdir0/nested*", False, 2, []),
|
fsspec/transaction.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from collections import deque
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class Transaction:
|
|
2
5
|
"""Filesystem transaction write context
|
|
3
6
|
|
|
@@ -13,10 +16,11 @@ class Transaction:
|
|
|
13
16
|
fs: FileSystem instance
|
|
14
17
|
"""
|
|
15
18
|
self.fs = fs
|
|
16
|
-
self.files =
|
|
19
|
+
self.files = deque()
|
|
17
20
|
|
|
18
21
|
def __enter__(self):
|
|
19
22
|
self.start()
|
|
23
|
+
return self
|
|
20
24
|
|
|
21
25
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
22
26
|
"""End transaction and commit, if exit is not due to exception"""
|
|
@@ -27,17 +31,17 @@ class Transaction:
|
|
|
27
31
|
|
|
28
32
|
def start(self):
|
|
29
33
|
"""Start a transaction on this FileSystem"""
|
|
30
|
-
self.files =
|
|
34
|
+
self.files = deque() # clean up after previous failed completions
|
|
31
35
|
self.fs._intrans = True
|
|
32
36
|
|
|
33
37
|
def complete(self, commit=True):
|
|
34
38
|
"""Finish transaction: commit or discard all deferred files"""
|
|
35
|
-
|
|
39
|
+
while self.files:
|
|
40
|
+
f = self.files.popleft()
|
|
36
41
|
if commit:
|
|
37
42
|
f.commit()
|
|
38
43
|
else:
|
|
39
44
|
f.discard()
|
|
40
|
-
self.files = []
|
|
41
45
|
self.fs._intrans = False
|
|
42
46
|
|
|
43
47
|
|